Пример #1
0
def is_acronym(token, exclude=None):
    """
    Pass single token as a string, return True/False if is/is not valid acronym.

    Args:
        token (str): single word to check for acronym-ness
        exclude (Set[str]): if technically valid but not actually good acronyms
            are known in advance, pass them in as a set of strings; matching
            tokens will return False

    Returns:
        bool
    """
    # exclude certain valid acronyms from consideration
    if exclude and token in exclude:
        return False
    # don't allow empty strings
    if not token:
        return False
    # don't allow spaces
    if ' ' in token:
        return False
    # 2-character acronyms can't have lower-case letters
    if len(token) == 2 and not token.isupper():
        return False
    # acronyms can't be all digits
    if token.isdigit():
        return False
    # acronyms must have at least one upper-case letter or start/end with a digit
    if (not any(char.isupper() for char in token)
            and not (token[0].isdigit() or token[-1].isdigit())):
        return False
    # acronyms must have between 2 and 10 alphanumeric characters
    if not 2 <= sum(1 for char in token if char.isalnum()) <= 10:
        return False
    # only certain combinations of letters, digits, and '&/.-' allowed
    if not ACRONYM_REGEX.match(token):
        return False
    return True
Пример #2
0
def is_acronym(token, exclude=None):
    """
    Pass single token as a string, return True/False if is/is not valid acronym.

    Args:
        token (str): single word to check for acronym-ness
        exclude (Set[str]): if technically valid but not actually good acronyms
            are known in advance, pass them in as a set of strings; matching
            tokens will return False

    Returns:
        bool
    """
    # exclude certain valid acronyms from consideration
    if exclude and token in exclude:
        return False
    # don't allow empty strings
    if not token:
        return False
    # don't allow spaces
    if ' ' in token:
        return False
    # 2-character acronyms can't have lower-case letters
    if len(token) == 2 and not token.isupper():
        return False
    # acronyms can't be all digits
    if token.isdigit():
        return False
    # acronyms must have at least one upper-case letter or start/end with a digit
    if (not any(char.isupper() for char in token) and
            not (token[0].isdigit() or token[-1].isdigit())):
        return False
    # acronyms must have between 2 and 10 alphanumeric characters
    if not 2 <= sum(1 for char in token if char.isalnum()) <= 10:
        return False
    # only certain combinations of letters, digits, and '&/.-' allowed
    if not ACRONYM_REGEX.match(token):
        return False
    return True
Пример #3
0
def test_bad_acronym_regex():
    for item in BAD_ACRONYMS:
        assert ACRONYM_REGEX.search(item) is None
Пример #4
0
def test_good_acronym_regex():
    for item in GOOD_ACRONYMS:
        assert item == ACRONYM_REGEX.search(item).group()
Пример #5
0
 def test_bad_acronym_regex(self):
     for item in BAD_ACRONYMS:
         self.assertIsNone(ACRONYM_REGEX.search(item))
Пример #6
0
 def test_good_acronym_regex(self):
     for item in GOOD_ACRONYMS:
         self.assertEqual(item, ACRONYM_REGEX.search(item).group())
Пример #7
0
 def test_bad_acronym_regex(self):
     for item in BAD_ACRONYMS:
         self.assertIsNone(ACRONYM_REGEX.search(item))
Пример #8
0
 def test_good_acronym_regex(self):
     for item in GOOD_ACRONYMS:
         self.assertEqual(item, ACRONYM_REGEX.search(item).group())