Python TrieRegEx.add примеры, trieregex.TrieRegEx.add Python примеры использования

Пример #1

0

Показать файл

def parse_enums(code: str, trie: TrieRegEx) -> None:
    """
    Finds enum values and adds them to the regex trie

    Args:
        code (str): Text to search through.
        trie (TrieRegEx): Trie to add results to.
    """
    # scoop the innards from all enums, excluding enum structs
    for enum_innards_match in re.finditer(
            r'enum(?!\s+struct)(?:.|\n)*?{((?:.|\n)*?)}', code):
        Debug.log('-- Enum match: --\n{}\n-------'.format(
            enum_innards_match.group(0)))
        enum_innards: str = enum_innards_match.group(1)

        # try to get each enum variable
        for enum_def_match in re.finditer(r'(?:^|\n)\s*?(\w+)\b',
                                          enum_innards):
            def_text: str = enum_def_match.group(1)

            # if it's all uppercase, skip it
            if (re.match(r'\b[A-Z_\d]+\b', def_text)): continue

            # skip if already contains
            if (trie.has(def_text)):
                Debug.log('Skipping enum, already added: {}'.format(def_text))
                continue

            trie.add(def_text)
            Debug.log('Enum added: {}'.format(def_text))

Пример #2

0

Показать файл

def parse_defines(code: str, trie: TrieRegEx) -> None:
    """
    Finds defines and adds them to the regex trie

    Args:
        code (str): Text to search through.
        trie (TrieRegEx): Trie to add results to.
    """
    # match all defines
    for define_match in re.finditer(r'^#define[ \t]*(\w+)\b[ \t]', code):
        define: str = define_match.group(1)

        # if all uppercase, ignore. Typically they should be uppercase but maybe there's an exception
        if (re.match(r'\b[A-Z_\d]+\b', define)): continue

        # skip if already contains
        if (trie.has(define)):
            Debug.log('Skipping define, already added: {}'.format(define))
            continue

        trie.add(define)
        Debug.log('Define added: {}'.format(define))

Пример #3

0

Показать файл

def parse_publicconstants(code: str, trie: TrieRegEx) -> None:
    """
    Finds public const variables and adds them to the regex trie

    Args:
        code (str): Text to search through.
        trie (TrieRegEx): Trie to add results to.
    """
    # match public constants aka magic variables
    for constant_match in re.finditer(
            r'public[ \t]+const[ \t]+\w+[ \t]+(\w+)\b', code):
        constant: str = constant_match.group(1)

        # if all uppercase, ignore
        if (re.match(r'\b[A-Z_\d]+\b', constant)): continue

        # skip if already contains
        if (trie.has(constant)):
            Debug.log('Skipping const, already added: {}'.format(constant))
            continue

        trie.add(constant)
        Debug.log('Const added: {}'.format(constant))

Пример #4

0

Показать файл

Файл: test_has.py Проект: ermanh/trieregex

class TestHas(unittest.TestCase):
    """Tests for TrieRegEx.has()"""
    def setUp(self):
        self.words = ['heal', 'health', 'healthy', 'healthier', 'healthiest']
        self.tre = TRE(*self.words)

    def test_existing_longest_word(self):
        self.assertTrue(self.tre.has('healthiest'))

    def test_existing_substring_word(self):
        self.assertTrue(self.tre.has('health'))
        self.assertTrue(self.tre.has('heal'))

    def test_nonexisting(self):
        self.assertFalse(self.tre.has('wound'))

    def test_nonword_substring_of_existing_word(self):
        self.assertFalse(self.tre.has('he'))

    def test_nonexisting_after_removed(self):
        """Also a test of the TrieRegEx.remove() function
        """
        self.assertTrue(self.tre.has('healthy'),
                        "'healthy' must first exist in trie")
        self.tre.remove('healthy')
        self.assertFalse(self.tre.has('healthy'))

    def test_existing_after_added(self):
        """Also a test of the TrieRegEx.add() function
        """
        self.assertFalse(self.tre.has('settled'),
                         "'settled' must first NOT exist in trie")
        self.tre.add('settled')
        self.assertTrue(self.tre.has('settled'))

    def test_empty_string(self):
        self.assertFalse(self.tre.has(''))

Пример #5

0

Показать файл

class TestRegex(unittest.TestCase):
    """Tests for TrieRegEx.regex()"""
    def setUp(self):
        self.tre = TRE()
        self.words = ['p', 'pe', 'pea', 'pear']
        self.more_words = [
            'orange', 'kumquat', 'tangerine', 'mandarin', 'pomelo', 'yuzu',
            'grapefruit', 'lemon', 'lime', 'bergamot', 'citron', 'clementine',
            'satsuma', 'tangelo', 'mikan', 'tangor', 'mint', 'peppermint',
            'spearmint', 'basil', 'cilantro', 'coriander', 'chives', 'parsley',
            'oregano', 'rosemary', 'thyme', 'scallion', 'ginger', 'garlic',
            'onion', 'galangal'
        ]

    def test_match_all_incrementals(self):
        self.tre.add(*self.words)
        found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words))

        self.assertEqual(sorted(found), sorted(self.words))

    def test_does_not_match_larger_string(self):
        self.tre.add('p')
        found = re.findall(f'\\b{self.tre.regex()}\\b', 'pe')
        self.assertEqual(found, [])

    def test_does_not_match_substring(self):
        my_words = self.words[1:]  # leave out 'p'
        self.tre.add(*my_words)
        found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words))
        self.assertEqual(found, sorted(my_words), "'p' should not be captured")

    def test_empty_trie_returns_empty_string_regex(self):
        self.assertEqual(self.tre.regex(), '')

    def test_match_all_words(self):
        self.tre.add(*self.more_words)
        pattern = f'\\b{self.tre.regex()}\\b'
        found = re.findall(pattern, ' '.join(self.more_words))
        self.assertEqual(sorted(found), sorted(self.more_words))

    def test_match_all_words_surrounded_by_spaces(self):
        words = sorted(self.more_words)
        self.tre.add(*words)
        found = re.findall(f"(?<= ){self.tre.regex()}(?= )", ' '.join(words))
        self.assertEqual(
            found, words[1:-1],
            'First and last item in sorted words list should not be matched.')

    def test_added_word_reflected_in_new_regex_call(self):
        self.tre.add(*self.words)
        self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?',
                         'Setup for the real test in the next assertEqual')
        self.tre.add('peak')
        self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?')

    def test_removed_word_reflected_in_new_regex_call(self):
        expanded = self.words + ['peak']
        self.tre.add(*expanded)
        self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?',
                         'Setup for the real test in the next assertEqual')
        self.tre.remove('peak')
        self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?')

    def test_multiple_adding_removing_reflected(self):
        """This test also checks that the memoizer cache clearing is called
        in the right places so that .add(), .remove(), and .regex() run
        correctly as expected
        """
        self.tre.add(*self.words)
        self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?',
                         'Setup for the real test in the next assertEqual')
        self.tre.add('peak')
        self.tre.remove('pe')
        self.tre.add('river')
        self.tre.add('rich')
        self.tre.remove('pea')
        self.tre.remove('peak')
        self.assertEqual(self.tre.regex(), '(?:ri(?:ver|ch)|p(?:ear)?)')
        self.tre.add('peak')
        self.tre.remove('peak')
        self.tre.remove('pear')
        self.tre.add(*self.words)
        self.assertEqual(self.tre.regex(), '(?:p(?:e(?:ar?)?)?|ri(?:ver|ch))')

Пример #6

0

Показать файл

class TestAdd(unittest.TestCase):
    """Tests for TrieRegEx.add()"""
    def setUp(self):
        self.tre = TRE()

    def test_one_word(self):
        self.tre.add('pear')
        self.assertEqual(self.tre._trie,
                         {'p': {
                             'e': {
                                 'a': {
                                     'r': {
                                         '**': {}
                                     }
                                 }
                             }
                         }})

    def test_two_words_together(self):
        self.tre.add('pear', 'peach')
        self.assertEqual(
            self.tre._trie,
            {'p': {
                'e': {
                    'a': {
                        'c': {
                            'h': {
                                '**': {}
                            }
                        },
                        'r': {
                            '**': {}
                        }
                    }
                }
            }})

    def test_two_words_added_separately(self):
        self.tre.add('pear')
        self.tre.add('peach')
        self.assertEqual(
            self.tre._trie,
            {'p': {
                'e': {
                    'a': {
                        'c': {
                            'h': {
                                '**': {}
                            }
                        },
                        'r': {
                            '**': {}
                        }
                    }
                }
            }})

    def test_two_words_different_initials(self):
        self.tre.add('pear', 'heart')
        self.assertEqual(
            self.tre._trie, {
                'p': {
                    'e': {
                        'a': {
                            'r': {
                                '**': {}
                            }
                        }
                    }
                },
                'h': {
                    'e': {
                        'a': {
                            'r': {
                                't': {
                                    '**': {}
                                }
                            }
                        }
                    }
                }
            })

    def test_three_words_different_prefix_length_matching(self):
        self.tre.add('pear', 'peach', 'perth')
        self.assertEqual(
            self.tre._trie, {
                'p': {
                    'e': {
                        'r': {
                            't': {
                                'h': {
                                    '**': {}
                                }
                            }
                        },
                        'a': {
                            'r': {
                                '**': {}
                            },
                            'c': {
                                'h': {
                                    '**': {}
                                }
                            }
                        }
                    }
                }
            })

    def test_add_empty_string_changes_nothing(self):
        self.tre.add('')
        self.assertEqual(
            self.tre._trie, {},
            'Add empty string to empty trie should yield empty trie')

        self.tre.add('pear')
        pear_trie = self.tre._trie
        self.tre.add('')
        self.assertEqual(
            pear_trie, self.tre._trie,
            'Add empty string to populated trie should yield same trie')

    def test_add_nonword_chars(self):
        self.tre.add('!wow', 'ask?')
        self.assertEqual(
            self.tre._trie, {
                '!': {
                    'w': {
                        'o': {
                            'w': {
                                '**': {}
                            }
                        }
                    }
                },
                'a': {
                    's': {
                        'k': {
                            '?': {
                                '**': {}
                            }
                        }
                    }
                }
            })

    def test_add_special_chars(self):
        self.tre.add('\nline', '\ttab', ' space')
        self.assertEqual(
            self.tre._trie, {
                '\n': {
                    'l': {
                        'i': {
                            'n': {
                                'e': {
                                    '**': {}
                                }
                            }
                        }
                    }
                },
                '\t': {
                    't': {
                        'a': {
                            'b': {
                                '**': {}
                            }
                        }
                    }
                },
                ' ': {
                    's': {
                        'p': {
                            'a': {
                                'c': {
                                    'e': {
                                        '**': {}
                                    }
                                }
                            }
                        }
                    }
                }
            })

    def test_add_incremental_words(self):
        self.tre.add('a', 'an', 'ana', 'anat', 'anath', 'anathe', 'anathem',
                     'anathema')
        self.assertEqual(
            self.tre._trie, {
                'a': {
                    '**': {},
                    'n': {
                        '**': {},
                        'a': {
                            '**': {},
                            't': {
                                '**': {},
                                'h': {
                                    '**': {},
                                    'e': {
                                        '**': {},
                                        'm': {
                                            '**': {},
                                            'a': {
                                                '**': {}
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            })

Пример #7

0

Показать файл

Файл: test_initials.py Проект: ermanh/trieregex

class TestInitials(unittest.TestCase):
    """Tests for TrieRegEx.initials() and TrieRegEx._initials"""
    def setUp(self):
        self.words = [
            'all', 'the', 'stars', 'we', 'steal', 'from', 'night', 'sky',
            'will', 'never', 'be', 'enough'
        ]
        self.tre = TRE(*self.words)

    def test_initials_variable(self):
        self.assertEqual(  # "hard" check
            self.tre._initials, {
                'a': 1,
                't': 1,
                's': 3,
                'w': 2,
                'f': 1,
                'n': 2,
                'b': 1,
                'e': 1
            })
        initials = defaultdict(int)  # "soft" check
        for w in self.words:
            initials[w[0]] += 1
        self.assertEqual(self.tre._initials, initials)

    def test_initials(self):
        self.assertEqual(  # "hard" check
            self.tre.initials(), ['a', 'b', 'e', 'f', 'n', 's', 't', 'w'])
        self.assertEqual(  # "soft" check
            self.tre.initials(), sorted(list(set([w[0] for w in self.words]))))

    def test_add_existing_word_will_not_change_counts(self):
        self.tre.add('the')
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 1,
            's': 3,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1
        }, "key-value pairs should remain the same")

    def test_add_new_word_increase_frequency(self):
        self.tre.add('spotlights')
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 1,
            's': 4,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1
        }, "'s' should be set to 4 (up from 3)")

    def test_add_new_initial(self):
        self.tre.add('dream')
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 1,
            's': 3,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1,
            'd': 1
        }, "new key 'd' should have a value of 1")

    def test_add_new_escaped_char(self):
        self.tre.add('\nnewline')
        self.assertEqual(
            self.tre._initials, {
                'a': 1,
                't': 1,
                's': 3,
                'w': 2,
                'f': 1,
                'n': 2,
                'b': 1,
                'e': 1,
                '\n': 1
            }, "new key '\n' should have a value of 1")

    def test_add_new_special_char(self):
        self.tre.add('åll')
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 1,
            's': 3,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1,
            'å': 1
        }, "new key 'å' should have a value of 1")

    def test_remove_word_lower_frequency(self):
        self.tre.remove('the')
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 0,
            's': 3,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1
        }, "'t' should have a value of 0 (down from 1)")

    def test_zero_frequency_should_not_appear(self):
        self.tre.remove('the')
        self.assertEqual(self.tre.initials(),
                         ['a', 'b', 'e', 'f', 'n', 's', 'w'],
                         "'t' should not appear in the list")

    def test_remove_nonexisting_initial_with_zero_frequency(self):
        self.tre.remove('the')  # set 't': 1 -> 't': 0
        self.tre.remove('table')  # attempt removal of nonexisting word
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 0,
            's': 3,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1
        }, "'t' should still have a value of 0")

    def test_remove_all(self):
        self.tre.remove(*self.words)
        self.assertEqual(self.tre._initials, {
            'a': 0,
            't': 0,
            's': 0,
            'w': 0,
            'f': 0,
            'n': 0,
            'b': 0,
            'e': 0
        }, "All keys should be set to a value of 0")

Пример #8

0

Показать файл

Файл: test_remove.py Проект: ermanh/trieregex

class TestRemove(unittest.TestCase):
    """Tests for TrieRegEx.remove()"""

    def setUp(self):
        self.words = ['heart', 'healthy', 'pear', 'peach',
                      'lark', 'look', 'change']
        self.incrementals = ['p', 'pe', 'pea', 'pear']
        self.tre = TRE(*self.words)
        self.tre_incr = TRE(*self.incrementals)

    def test_remove_one(self):
        self.tre.remove('healthy')
        self.assertEqual(           # "hard" check
            self.tre._trie,
            {
                'c': {'h': {'a': {'n': {'g': {'e': {'**': {}}}}}}},
                'l': {'a': {'r': {'k': {'**': {}}}},
                    'o': {'o': {'k': {'**': {}}}}},
                'h': {'e': {'a': {'r': {'t': {'**': {}}}}}},
                'p': {'e': {'a': {'c': {'h': {'**': {}}},
                                'r': {'**': {}}}}}
            },
            "'healthy' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre._trie,
            TRE(*[w for w in self.words if w != 'healthy'])._trie,
            "'healthy' should have been removed (soft check)"
        )

    def test_remove_two(self):
        self.tre.remove('healthy', 'change')
        self.assertEqual(           # "hard" check
            self.tre._trie,
            {
                'l': {'a': {'r': {'k': {'**': {}}}},
                    'o': {'o': {'k': {'**': {}}}}},
                'h': {'e': {'a': {'r': {'t': {'**': {}}}}}},
                'p': {'e': {'a': {'c': {'h': {'**': {}}},
                                'r': {'**': {}}}}}
            },
            "'healthy' and 'change' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre._trie,
            TRE('lark', 'look', 'heart', 'peach', 'pear')._trie,
            "'healthy' and 'change' should have been removed (soft check)"
        )

    def test_remove_all(self):
        self.tre.remove(*self.words)
        self.assertEqual(self.tre._trie, {}, 'Trie should be empty')

    def test_remove_second_time(self):
        self.tre.remove(*self.words)
        self.tre.add(*self.words)
        self.tre.remove(*[w for w in self.words if w != 'pear'])
        self.assertEqual(           # "hard" check
            self.tre._trie,
            {'p': {'e': {'a': {'r': {'**': {}}}}}},
            "Only 'pear' should be in trie (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre._trie, 
            TRE('pear')._trie,
            "Only 'pear' should be in trie (soft check)"
        )  
    
    def test_remove_first_in_incremental_words(self):
        self.tre_incr.remove('p')
        self.assertEqual(           # "hard" check
            self.tre_incr._trie,
            {'p': {'e': {'**': {},
                         'a': {'**': {},
                               'r': {'**': {}}}}}},
            "'p' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre_incr._trie,
            TRE('pe', 'pea', 'pear')._trie,
            "'p' should have been removed (soft check)"
        )

    def test_remove_middle_in_incremental_words(self):
        self.tre_incr.remove('pea')
        self.assertEqual(           # "hard" check
            self.tre_incr._trie,
            {'p': {'**': {},
                   'e': {'**': {},
                         'a': {'r': {'**': {}}}}}},
            "'pea' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre_incr._trie,
            TRE('p', 'pe', 'pear')._trie,
            "'pea' should have been removed (soft check)"
        )

    def test_remove_last_in_incremental_words(self):
        self.tre_incr.remove('pear')
        self.assertEqual(           # "hard" check
            self.tre_incr._trie,
            {'p': {'**': {},
                   'e': {'**': {},
                         'a': {'**': {}}}}},
            "'pear' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre_incr._trie,
            TRE('p', 'pe', 'pea')._trie,
            "'pear' should have been removed (soft check)"
        )

    def test_remove_one_in_multiple_shared(self):
        tre = TRE('brander', 'brandy', 'brandless')
        tre.remove('brandless')
        self.assertEqual(           # "hard" check
            tre._trie,
            {'b': {'r': {'a': {'n': {'d': {'y': {'**': {}},
                                           'e': {'r': {'**': {}}}}}}}}},
            "'brandless' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            tre._trie,
            TRE('brander', 'brandy')._trie,
            "'brandless' should have been removed (soft check)"
        )

    def test_remove_nonexisting_word(self):
        self.tre_incr.remove('riffraff')
        self.assertEqual(           # "hard" check
            self.tre_incr._trie,
            {'p': {'**': {},
                   'e': {'**': {},
                         'a': {'**': {},
                               'r': {'**': {}}}}}},
            "Trie should remain the same (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre_incr._trie,
            TRE(*self.incrementals)._trie,
            "Trie should remain the same (soft check)"
        )

Python TrieRegEx.add примеры использования