def parse_enums(code: str, trie: TrieRegEx) -> None: """ Finds enum values and adds them to the regex trie Args: code (str): Text to search through. trie (TrieRegEx): Trie to add results to. """ # scoop the innards from all enums, excluding enum structs for enum_innards_match in re.finditer( r'enum(?!\s+struct)(?:.|\n)*?{((?:.|\n)*?)}', code): Debug.log('-- Enum match: --\n{}\n-------'.format( enum_innards_match.group(0))) enum_innards: str = enum_innards_match.group(1) # try to get each enum variable for enum_def_match in re.finditer(r'(?:^|\n)\s*?(\w+)\b', enum_innards): def_text: str = enum_def_match.group(1) # if it's all uppercase, skip it if (re.match(r'\b[A-Z_\d]+\b', def_text)): continue # skip if already contains if (trie.has(def_text)): Debug.log('Skipping enum, already added: {}'.format(def_text)) continue trie.add(def_text) Debug.log('Enum added: {}'.format(def_text))
def parse_defines(code: str, trie: TrieRegEx) -> None: """ Finds defines and adds them to the regex trie Args: code (str): Text to search through. trie (TrieRegEx): Trie to add results to. """ # match all defines for define_match in re.finditer(r'^#define[ \t]*(\w+)\b[ \t]', code): define: str = define_match.group(1) # if all uppercase, ignore. Typically they should be uppercase but maybe there's an exception if (re.match(r'\b[A-Z_\d]+\b', define)): continue # skip if already contains if (trie.has(define)): Debug.log('Skipping define, already added: {}'.format(define)) continue trie.add(define) Debug.log('Define added: {}'.format(define))
def parse_publicconstants(code: str, trie: TrieRegEx) -> None: """ Finds public const variables and adds them to the regex trie Args: code (str): Text to search through. trie (TrieRegEx): Trie to add results to. """ # match public constants aka magic variables for constant_match in re.finditer( r'public[ \t]+const[ \t]+\w+[ \t]+(\w+)\b', code): constant: str = constant_match.group(1) # if all uppercase, ignore if (re.match(r'\b[A-Z_\d]+\b', constant)): continue # skip if already contains if (trie.has(constant)): Debug.log('Skipping const, already added: {}'.format(constant)) continue trie.add(constant) Debug.log('Const added: {}'.format(constant))
class TestHas(unittest.TestCase): """Tests for TrieRegEx.has()""" def setUp(self): self.words = ['heal', 'health', 'healthy', 'healthier', 'healthiest'] self.tre = TRE(*self.words) def test_existing_longest_word(self): self.assertTrue(self.tre.has('healthiest')) def test_existing_substring_word(self): self.assertTrue(self.tre.has('health')) self.assertTrue(self.tre.has('heal')) def test_nonexisting(self): self.assertFalse(self.tre.has('wound')) def test_nonword_substring_of_existing_word(self): self.assertFalse(self.tre.has('he')) def test_nonexisting_after_removed(self): """Also a test of the TrieRegEx.remove() function """ self.assertTrue(self.tre.has('healthy'), "'healthy' must first exist in trie") self.tre.remove('healthy') self.assertFalse(self.tre.has('healthy')) def test_existing_after_added(self): """Also a test of the TrieRegEx.add() function """ self.assertFalse(self.tre.has('settled'), "'settled' must first NOT exist in trie") self.tre.add('settled') self.assertTrue(self.tre.has('settled')) def test_empty_string(self): self.assertFalse(self.tre.has(''))
class TestRegex(unittest.TestCase): """Tests for TrieRegEx.regex()""" def setUp(self): self.tre = TRE() self.words = ['p', 'pe', 'pea', 'pear'] self.more_words = [ 'orange', 'kumquat', 'tangerine', 'mandarin', 'pomelo', 'yuzu', 'grapefruit', 'lemon', 'lime', 'bergamot', 'citron', 'clementine', 'satsuma', 'tangelo', 'mikan', 'tangor', 'mint', 'peppermint', 'spearmint', 'basil', 'cilantro', 'coriander', 'chives', 'parsley', 'oregano', 'rosemary', 'thyme', 'scallion', 'ginger', 'garlic', 'onion', 'galangal' ] def test_match_all_incrementals(self): self.tre.add(*self.words) found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words)) self.assertEqual(sorted(found), sorted(self.words)) def test_does_not_match_larger_string(self): self.tre.add('p') found = re.findall(f'\\b{self.tre.regex()}\\b', 'pe') self.assertEqual(found, []) def test_does_not_match_substring(self): my_words = self.words[1:] # leave out 'p' self.tre.add(*my_words) found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words)) self.assertEqual(found, sorted(my_words), "'p' should not be captured") def test_empty_trie_returns_empty_string_regex(self): self.assertEqual(self.tre.regex(), '') def test_match_all_words(self): self.tre.add(*self.more_words) pattern = f'\\b{self.tre.regex()}\\b' found = re.findall(pattern, ' '.join(self.more_words)) self.assertEqual(sorted(found), sorted(self.more_words)) def test_match_all_words_surrounded_by_spaces(self): words = sorted(self.more_words) self.tre.add(*words) found = re.findall(f"(?<= ){self.tre.regex()}(?= )", ' '.join(words)) self.assertEqual( found, words[1:-1], 'First and last item in sorted words list should not be matched.') def test_added_word_reflected_in_new_regex_call(self): self.tre.add(*self.words) self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?', 'Setup for the real test in the next assertEqual') self.tre.add('peak') self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?') def test_removed_word_reflected_in_new_regex_call(self): expanded = self.words + ['peak'] self.tre.add(*expanded) self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?', 'Setup for the real test in the next assertEqual') self.tre.remove('peak') self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?') def test_multiple_adding_removing_reflected(self): """This test also checks that the memoizer cache clearing is called in the right places so that .add(), .remove(), and .regex() run correctly as expected """ self.tre.add(*self.words) self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?', 'Setup for the real test in the next assertEqual') self.tre.add('peak') self.tre.remove('pe') self.tre.add('river') self.tre.add('rich') self.tre.remove('pea') self.tre.remove('peak') self.assertEqual(self.tre.regex(), '(?:ri(?:ver|ch)|p(?:ear)?)') self.tre.add('peak') self.tre.remove('peak') self.tre.remove('pear') self.tre.add(*self.words) self.assertEqual(self.tre.regex(), '(?:p(?:e(?:ar?)?)?|ri(?:ver|ch))')
class TestAdd(unittest.TestCase): """Tests for TrieRegEx.add()""" def setUp(self): self.tre = TRE() def test_one_word(self): self.tre.add('pear') self.assertEqual(self.tre._trie, {'p': { 'e': { 'a': { 'r': { '**': {} } } } }}) def test_two_words_together(self): self.tre.add('pear', 'peach') self.assertEqual( self.tre._trie, {'p': { 'e': { 'a': { 'c': { 'h': { '**': {} } }, 'r': { '**': {} } } } }}) def test_two_words_added_separately(self): self.tre.add('pear') self.tre.add('peach') self.assertEqual( self.tre._trie, {'p': { 'e': { 'a': { 'c': { 'h': { '**': {} } }, 'r': { '**': {} } } } }}) def test_two_words_different_initials(self): self.tre.add('pear', 'heart') self.assertEqual( self.tre._trie, { 'p': { 'e': { 'a': { 'r': { '**': {} } } } }, 'h': { 'e': { 'a': { 'r': { 't': { '**': {} } } } } } }) def test_three_words_different_prefix_length_matching(self): self.tre.add('pear', 'peach', 'perth') self.assertEqual( self.tre._trie, { 'p': { 'e': { 'r': { 't': { 'h': { '**': {} } } }, 'a': { 'r': { '**': {} }, 'c': { 'h': { '**': {} } } } } } }) def test_add_empty_string_changes_nothing(self): self.tre.add('') self.assertEqual( self.tre._trie, {}, 'Add empty string to empty trie should yield empty trie') self.tre.add('pear') pear_trie = self.tre._trie self.tre.add('') self.assertEqual( pear_trie, self.tre._trie, 'Add empty string to populated trie should yield same trie') def test_add_nonword_chars(self): self.tre.add('!wow', 'ask?') self.assertEqual( self.tre._trie, { '!': { 'w': { 'o': { 'w': { '**': {} } } } }, 'a': { 's': { 'k': { '?': { '**': {} } } } } }) def test_add_special_chars(self): self.tre.add('\nline', '\ttab', ' space') self.assertEqual( self.tre._trie, { '\n': { 'l': { 'i': { 'n': { 'e': { '**': {} } } } } }, '\t': { 't': { 'a': { 'b': { '**': {} } } } }, ' ': { 's': { 'p': { 'a': { 'c': { 'e': { '**': {} } } } } } } }) def test_add_incremental_words(self): self.tre.add('a', 'an', 'ana', 'anat', 'anath', 'anathe', 'anathem', 'anathema') self.assertEqual( self.tre._trie, { 'a': { '**': {}, 'n': { '**': {}, 'a': { '**': {}, 't': { '**': {}, 'h': { '**': {}, 'e': { '**': {}, 'm': { '**': {}, 'a': { '**': {} } } } } } } } } })
class TestInitials(unittest.TestCase): """Tests for TrieRegEx.initials() and TrieRegEx._initials""" def setUp(self): self.words = [ 'all', 'the', 'stars', 'we', 'steal', 'from', 'night', 'sky', 'will', 'never', 'be', 'enough' ] self.tre = TRE(*self.words) def test_initials_variable(self): self.assertEqual( # "hard" check self.tre._initials, { 'a': 1, 't': 1, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1 }) initials = defaultdict(int) # "soft" check for w in self.words: initials[w[0]] += 1 self.assertEqual(self.tre._initials, initials) def test_initials(self): self.assertEqual( # "hard" check self.tre.initials(), ['a', 'b', 'e', 'f', 'n', 's', 't', 'w']) self.assertEqual( # "soft" check self.tre.initials(), sorted(list(set([w[0] for w in self.words])))) def test_add_existing_word_will_not_change_counts(self): self.tre.add('the') self.assertEqual(self.tre._initials, { 'a': 1, 't': 1, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1 }, "key-value pairs should remain the same") def test_add_new_word_increase_frequency(self): self.tre.add('spotlights') self.assertEqual(self.tre._initials, { 'a': 1, 't': 1, 's': 4, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1 }, "'s' should be set to 4 (up from 3)") def test_add_new_initial(self): self.tre.add('dream') self.assertEqual(self.tre._initials, { 'a': 1, 't': 1, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1, 'd': 1 }, "new key 'd' should have a value of 1") def test_add_new_escaped_char(self): self.tre.add('\nnewline') self.assertEqual( self.tre._initials, { 'a': 1, 't': 1, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1, '\n': 1 }, "new key '\n' should have a value of 1") def test_add_new_special_char(self): self.tre.add('åll') self.assertEqual(self.tre._initials, { 'a': 1, 't': 1, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1, 'å': 1 }, "new key 'å' should have a value of 1") def test_remove_word_lower_frequency(self): self.tre.remove('the') self.assertEqual(self.tre._initials, { 'a': 1, 't': 0, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1 }, "'t' should have a value of 0 (down from 1)") def test_zero_frequency_should_not_appear(self): self.tre.remove('the') self.assertEqual(self.tre.initials(), ['a', 'b', 'e', 'f', 'n', 's', 'w'], "'t' should not appear in the list") def test_remove_nonexisting_initial_with_zero_frequency(self): self.tre.remove('the') # set 't': 1 -> 't': 0 self.tre.remove('table') # attempt removal of nonexisting word self.assertEqual(self.tre._initials, { 'a': 1, 't': 0, 's': 3, 'w': 2, 'f': 1, 'n': 2, 'b': 1, 'e': 1 }, "'t' should still have a value of 0") def test_remove_all(self): self.tre.remove(*self.words) self.assertEqual(self.tre._initials, { 'a': 0, 't': 0, 's': 0, 'w': 0, 'f': 0, 'n': 0, 'b': 0, 'e': 0 }, "All keys should be set to a value of 0")
class TestRemove(unittest.TestCase): """Tests for TrieRegEx.remove()""" def setUp(self): self.words = ['heart', 'healthy', 'pear', 'peach', 'lark', 'look', 'change'] self.incrementals = ['p', 'pe', 'pea', 'pear'] self.tre = TRE(*self.words) self.tre_incr = TRE(*self.incrementals) def test_remove_one(self): self.tre.remove('healthy') self.assertEqual( # "hard" check self.tre._trie, { 'c': {'h': {'a': {'n': {'g': {'e': {'**': {}}}}}}}, 'l': {'a': {'r': {'k': {'**': {}}}}, 'o': {'o': {'k': {'**': {}}}}}, 'h': {'e': {'a': {'r': {'t': {'**': {}}}}}}, 'p': {'e': {'a': {'c': {'h': {'**': {}}}, 'r': {'**': {}}}}} }, "'healthy' should have been removed (hard check)" ) self.assertEqual( # "soft" check self.tre._trie, TRE(*[w for w in self.words if w != 'healthy'])._trie, "'healthy' should have been removed (soft check)" ) def test_remove_two(self): self.tre.remove('healthy', 'change') self.assertEqual( # "hard" check self.tre._trie, { 'l': {'a': {'r': {'k': {'**': {}}}}, 'o': {'o': {'k': {'**': {}}}}}, 'h': {'e': {'a': {'r': {'t': {'**': {}}}}}}, 'p': {'e': {'a': {'c': {'h': {'**': {}}}, 'r': {'**': {}}}}} }, "'healthy' and 'change' should have been removed (hard check)" ) self.assertEqual( # "soft" check self.tre._trie, TRE('lark', 'look', 'heart', 'peach', 'pear')._trie, "'healthy' and 'change' should have been removed (soft check)" ) def test_remove_all(self): self.tre.remove(*self.words) self.assertEqual(self.tre._trie, {}, 'Trie should be empty') def test_remove_second_time(self): self.tre.remove(*self.words) self.tre.add(*self.words) self.tre.remove(*[w for w in self.words if w != 'pear']) self.assertEqual( # "hard" check self.tre._trie, {'p': {'e': {'a': {'r': {'**': {}}}}}}, "Only 'pear' should be in trie (hard check)" ) self.assertEqual( # "soft" check self.tre._trie, TRE('pear')._trie, "Only 'pear' should be in trie (soft check)" ) def test_remove_first_in_incremental_words(self): self.tre_incr.remove('p') self.assertEqual( # "hard" check self.tre_incr._trie, {'p': {'e': {'**': {}, 'a': {'**': {}, 'r': {'**': {}}}}}}, "'p' should have been removed (hard check)" ) self.assertEqual( # "soft" check self.tre_incr._trie, TRE('pe', 'pea', 'pear')._trie, "'p' should have been removed (soft check)" ) def test_remove_middle_in_incremental_words(self): self.tre_incr.remove('pea') self.assertEqual( # "hard" check self.tre_incr._trie, {'p': {'**': {}, 'e': {'**': {}, 'a': {'r': {'**': {}}}}}}, "'pea' should have been removed (hard check)" ) self.assertEqual( # "soft" check self.tre_incr._trie, TRE('p', 'pe', 'pear')._trie, "'pea' should have been removed (soft check)" ) def test_remove_last_in_incremental_words(self): self.tre_incr.remove('pear') self.assertEqual( # "hard" check self.tre_incr._trie, {'p': {'**': {}, 'e': {'**': {}, 'a': {'**': {}}}}}, "'pear' should have been removed (hard check)" ) self.assertEqual( # "soft" check self.tre_incr._trie, TRE('p', 'pe', 'pea')._trie, "'pear' should have been removed (soft check)" ) def test_remove_one_in_multiple_shared(self): tre = TRE('brander', 'brandy', 'brandless') tre.remove('brandless') self.assertEqual( # "hard" check tre._trie, {'b': {'r': {'a': {'n': {'d': {'y': {'**': {}}, 'e': {'r': {'**': {}}}}}}}}}, "'brandless' should have been removed (hard check)" ) self.assertEqual( # "soft" check tre._trie, TRE('brander', 'brandy')._trie, "'brandless' should have been removed (soft check)" ) def test_remove_nonexisting_word(self): self.tre_incr.remove('riffraff') self.assertEqual( # "hard" check self.tre_incr._trie, {'p': {'**': {}, 'e': {'**': {}, 'a': {'**': {}, 'r': {'**': {}}}}}}, "Trie should remain the same (hard check)" ) self.assertEqual( # "soft" check self.tre_incr._trie, TRE(*self.incrementals)._trie, "Trie should remain the same (soft check)" )