def test_levenshteinDistance_feidanchaoren0043_feidanchaoren0011(self): """The Levenshtein Distance between the usernames in '*****@*****.**' and '*****@*****.**' should be less than an EMAIL_FUZZY_MATCH parameter. """ email1 = Address('*****@*****.**') email2 = Address('*****@*****.**') # Fuzzy match if the Levenshtein Distance is less than or equal to: fuzzyMatch = 4 distance = util.levenshteinDistance(email1.local, email2.local) self.assertLessEqual(distance, fuzzyMatch)
def runChecks(self, client): """Run checks on the incoming message, and only reply if they pass. 1. Check if the client's address is whitelisted. 2. If it's not whitelisted, check that the domain names, taken from the SMTP ``MAIL FROM:`` command and the email ``'From:'`` header, can be :func:`canonicalized <addr.canonicalizeEmailDomain>`. 3. Check that those canonical domains match. 4. If the incoming message is from a domain which supports DKIM signing, then run :func:`bridgedb.email.dkim.checkDKIM` as well. .. note:: Calling this method sets the ``canonicalFromEmail`` and :data:``canonicalDomainRules`` attributes of the :data:`incoming` message. :param client: An :api:`twisted.mail.smtp.Address`, which contains the client's email address, extracted from the ``'From:'`` header from the incoming email. :rtype: bool :returns: ``False`` if the checks didn't pass, ``True`` otherwise. """ # If the SMTP ``RCPT TO:`` domain name couldn't be canonicalized, then # we *should* have bailed at the SMTP layer, but we'll reject this # email again nonetheless: if not self.incoming.canonicalFromSMTP: logging.warn(("SMTP 'MAIL FROM' wasn't from a canonical domain " "for email from %s") % str(client)) return False # Allow whitelisted addresses through the canonicalization check: if str(client) in self.incoming.context.whitelist.keys(): self.incoming.canonicalFromEmail = client.domain logging.info("'From:' header contained whitelisted address: %s" % str(client)) # Straight up reject addresses in the EMAIL_BLACKLIST config option: elif str(client) in self.incoming.context.blacklist: logging.info("'From:' header contained blacklisted address: %s") return False else: logging.debug("Canonicalizing client email domain...") try: # The client's address was already checked to see if it came # from a supported domain and is a valid email address in # :meth:`getMailTo`, so we should just be able to re-extract # the canonical domain safely here: self.incoming.canonicalFromEmail = canonicalizeEmailDomain( client.domain, self.incoming.canon) logging.debug("Canonical email domain: %s" % self.incoming.canonicalFromEmail) except addr.UnsupportedDomain as error: logging.info("Domain couldn't be canonicalized: %s" % safelog.logSafely(client.domain)) return False # The canonical domains from the SMTP ``MAIL FROM:`` and the email # ``From:`` header should match: if self.incoming.canonicalFromSMTP != self.incoming.canonicalFromEmail: logging.error("SMTP/Email canonical domain mismatch!") logging.debug("Canonical domain mismatch: %s != %s" % (self.incoming.canonicalFromSMTP, self.incoming.canonicalFromEmail)) #return False self.incoming.domainRules = self.incoming.context.domainRules.get( self.incoming.canonicalFromEmail, list()) # If the domain's ``domainRules`` say to check DKIM verification # results, and those results look bad, reject this email: if not dkim.checkDKIM(self.incoming.message, self.incoming.domainRules): return False # If fuzzy matching is enabled via the EMAIL_FUZZY_MATCH setting, then # calculate the Levenshtein String Distance (see # :func:`~bridgedb.util.levenshteinDistance`): if self.incoming.context.fuzzyMatch != 0: for blacklistedAddress in self.incoming.context.blacklist: distance = levenshteinDistance(str(client), blacklistedAddress) if distance <= self.incoming.context.fuzzyMatch: logging.info( "Fuzzy-matched %s to blacklisted address %s!" % (self.incoming.canonicalFromEmail, blacklistedAddress)) return False return True
def runChecks(self, client): """Run checks on the incoming message, and only reply if they pass. 1. Check if the client's address is whitelisted. 2. If it's not whitelisted, check that the domain names, taken from the SMTP ``MAIL FROM:`` command and the email ``'From:'`` header, can be :func:`canonicalized <addr.canonicalizeEmailDomain>`. 3. Check that those canonical domains match. 4. If the incoming message is from a domain which supports DKIM signing, then run :func:`bridgedb.email.dkim.checkDKIM` as well. .. note:: Calling this method sets the ``canonicalFromEmail`` and :data:``canonicalDomainRules`` attributes of the :data:`incoming` message. :param client: An :api:`twisted.mail.smtp.Address`, which contains the client's email address, extracted from the ``'From:'`` header from the incoming email. :rtype: bool :returns: ``False`` if the checks didn't pass, ``True`` otherwise. """ # If the SMTP ``RCPT TO:`` domain name couldn't be canonicalized, then # we *should* have bailed at the SMTP layer, but we'll reject this # email again nonetheless: if not self.incoming.canonicalFromSMTP: logging.warn(("SMTP 'MAIL FROM' wasn't from a canonical domain " "for email from %s") % str(client)) return False # Allow whitelisted addresses through the canonicalization check: if str(client) in self.incoming.context.whitelist.keys(): self.incoming.canonicalFromEmail = client.domain logging.info("'From:' header contained whitelisted address: %s" % str(client)) # Straight up reject addresses in the EMAIL_BLACKLIST config option: elif str(client) in self.incoming.context.blacklist: logging.info("'From:' header contained blacklisted address: %s") return False else: logging.debug("Canonicalizing client email domain...") try: # The client's address was already checked to see if it came # from a supported domain and is a valid email address in # :meth:`getMailTo`, so we should just be able to re-extract # the canonical domain safely here: self.incoming.canonicalFromEmail = canonicalizeEmailDomain( client.domain, self.incoming.canon) logging.debug("Canonical email domain: %s" % self.incoming.canonicalFromEmail) except addr.UnsupportedDomain as error: logging.info("Domain couldn't be canonicalized: %s" % safelog.logSafely(client.domain)) return False # The canonical domains from the SMTP ``MAIL FROM:`` and the email # ``From:`` header should match: if self.incoming.canonicalFromSMTP != self.incoming.canonicalFromEmail: logging.error("SMTP/Email canonical domain mismatch!") logging.debug("Canonical domain mismatch: %s != %s" % (self.incoming.canonicalFromSMTP, self.incoming.canonicalFromEmail)) #return False self.incoming.domainRules = self.incoming.context.domainRules.get( self.incoming.canonicalFromEmail, list()) # If the domain's ``domainRules`` say to check DKIM verification # results, and those results look bad, reject this email: if not dkim.checkDKIM(self.incoming.message, self.incoming.domainRules): return False # If fuzzy matching is enabled via the EMAIL_FUZZY_MATCH setting, then # calculate the Levenshtein String Distance (see # :func:`~bridgedb.util.levenshteinDistance`): if self.incoming.context.fuzzyMatch != 0: for blacklistedAddress in self.incoming.context.blacklist: distance = levenshteinDistance(str(client), blacklistedAddress) if distance <= self.incoming.context.fuzzyMatch: logging.info("Fuzzy-matched %s to blacklisted address %s!" % (self.incoming.canonicalFromEmail, blacklistedAddress)) return False return True
def test_levenshteinDistance_bridgedb_doge(self): """The Levenshtein Distance between 'bridgedb' and 'doge' should be 6.""" distance = util.levenshteinDistance('bridgedb', 'doge') self.assertEqual(distance, 6)
def test_levenshteinDistance_bar_cat(self): """The Levenshtein Distance between 'bar' and 'cat' should be 2.""" distance = util.levenshteinDistance('bar', 'cat') self.assertEqual(distance, 2)
def test_levenshteinDistance_cat_cat(self): """The Levenshtein Distance between 'cat' and 'cat' should be 0.""" distance = util.levenshteinDistance('cat', 'cat') self.assertEqual(distance, 0)
def test_levenshteinDistance_blank_blank(self): """The Levenshtein Distance between '' and '' should be 0.""" distance = util.levenshteinDistance('', '') self.assertEqual(distance, 0)