def is_acronym(token, exclude=None): """ Pass single token as a string, return True/False if is/is not valid acronym. Args: token (str): single word to check for acronym-ness exclude (Set[str]): if technically valid but not actually good acronyms are known in advance, pass them in as a set of strings; matching tokens will return False Returns: bool """ # exclude certain valid acronyms from consideration if exclude and token in exclude: return False # don't allow empty strings if not token: return False # don't allow spaces if ' ' in token: return False # 2-character acronyms can't have lower-case letters if len(token) == 2 and not token.isupper(): return False # acronyms can't be all digits if token.isdigit(): return False # acronyms must have at least one upper-case letter or start/end with a digit if (not any(char.isupper() for char in token) and not (token[0].isdigit() or token[-1].isdigit())): return False # acronyms must have between 2 and 10 alphanumeric characters if not 2 <= sum(1 for char in token if char.isalnum()) <= 10: return False # only certain combinations of letters, digits, and '&/.-' allowed if not ACRONYM_REGEX.match(token): return False return True
def test_bad_acronym_regex(): for item in BAD_ACRONYMS: assert ACRONYM_REGEX.search(item) is None
def test_good_acronym_regex(): for item in GOOD_ACRONYMS: assert item == ACRONYM_REGEX.search(item).group()
def test_bad_acronym_regex(self): for item in BAD_ACRONYMS: self.assertIsNone(ACRONYM_REGEX.search(item))
def test_good_acronym_regex(self): for item in GOOD_ACRONYMS: self.assertEqual(item, ACRONYM_REGEX.search(item).group())