def test_thompson_compile_nfa_bracket_repetitions(): (nfa, q0, f) = thompson_compile_nfa("[x-z]{1,3}") if in_ipynb(): ipynb_display_graph(nfa) for w in ["x", "y", "xx", "xy", "zy", "xxx", "yyy", "zzz", "xyz", "zyx"]: assert accepts(w, nfa) is True for w in ["", "xxxx", "aaa"]: assert accepts(w, nfa) is False (nfa, q0, f) = thompson_compile_nfa("x{3}") assert accepts("xxx", nfa)
def test_thompson_compile_nfa_alternation(): (nfa, q0, f) = thompson_compile_nfa("a*|b") if in_ipynb(): ipynb_display_graph(nfa) assert not accepts("bbbbb", nfa) assert accepts("b", nfa) assert accepts("aaaaaa", nfa)
def test_thompson_compile_nfa_repetition(): (nfa, q0, f) = thompson_compile_nfa("((ab){3})*") if in_ipynb(): ipynb_display_graph(nfa) for i in range(7): assert accepts("ab" * i, nfa) == (i % 3 == 0), f"w = {'ab' * i}, i = {i}"
def test_thompson_compile_nfa_one_or_more(): (nfa, q0, f) = thompson_compile_nfa("(ab+)+") if in_ipynb(): ipynb_display_graph(nfa) assert not accepts("", nfa) assert not accepts("b", nfa) assert accepts("abbbbb", nfa) assert accepts("abbbbbabbbbbabbbbb", nfa)
def test_class_s(): for r in (r"\s+", r"[\s]+"): print(r) (nfa, q0, f) = thompson_compile_nfa(r) assert nfa.accepts(" ") assert nfa.accepts(" ") assert nfa.accepts("\t\t") assert nfa.accepts(" \t \t ")
def test_thompson_compile_nfa_concatenation(): (nfa, q0, f) = thompson_compile_nfa("a+b+") if in_ipynb(): ipynb_display_graph(nfa) assert not accepts("abab", nfa) assert not accepts("aaa", nfa) assert not accepts("bbb", nfa) assert accepts("ab", nfa) assert accepts("aaaaaabbbbbb", nfa)
def test_escaped_operator(): (nfa, q0, f) = thompson_compile_nfa("a\\?b") assert accepts("a?b", nfa) == True assert accepts("ab", nfa) == False assert accepts("b", nfa) == False (nfa, q0, f) = thompson_compile_nfa("a?b") assert accepts("a?b", nfa) == False assert accepts("ab", nfa) == True assert accepts("b", nfa) == True for regexp in r"\|", r"\.", r"\*", r"\+", r"\(", r"\)", r"\{", r"\}", r"\[", r"\]": (nfa, q0, f) = thompson_compile_nfa(regexp) assert accepts(regexp.replace("\\", ""), nfa) regexp = r"\|\.\*\+\(\)\{\}\[\]" (nfa, q0, f) = thompson_compile_nfa(regexp) accepts(regexp.replace("\\", ""), nfa)
def test_thompson_compile_nfa_zero_or_one(): (nfa, q0, f) = thompson_compile_nfa("(ab?)*") if in_ipynb(): ipynb_display_graph(nfa) assert accepts("", nfa) assert not accepts("b", nfa) assert accepts("a", nfa) assert accepts("ab", nfa) assert accepts("aba", nfa) assert not accepts("abb", nfa)
def test_escaped_classes(): whole_alphabet = DEFAULT_ALPHABET escaped_classes = [r"\d", r"\w", r"\s", r"\D", r"\W", r"\S"] map_escape_allowed = { r: set(parse_escaped(r, whole_alphabet)) for r in escaped_classes } for regexp in [r"\d", r"\w", r"\s", r"\D", r"\W", r"\S"]: allowed = map_escape_allowed[regexp.lower()] if regexp.lower() != regexp: allowed = set(whole_alphabet) - allowed (nfa, q0, f) = thompson_compile_nfa(regexp, whole_alphabet) for a in whole_alphabet: assert accepts(a, nfa) == (a in allowed), \ f"regexp = {regexp} a = '{a}' ({ord(a)}) allowed = '{allowed}' obtained = {accepts(a, nfa)} expected = {a in allowed}"
def test_thompson_compile_nfa_escaped_operators(): regexp = r"\|\.\*\+\(\)\{\}\[\]aa" (nfa, q0, f) = thompson_compile_nfa(regexp) accepts(regexp.replace("\\", ""), nfa) if in_ipynb(): ipynb_display_graph(nfa)
def test_thompson_compile_nfa(): (nfa, q0, f) = thompson_compile_nfa("(a?b)*?c+d") if in_ipynb(): ipynb_display_graph(nfa) assert accepts("babbbababcccccd", nfa)
def compile_dfa(regexp :str, complete :bool = False) -> Automaton: (nfa, q0, f) = thompson_compile_nfa(regexp) dfa = moore_determination(nfa, complete=complete) return dfa
def compile_nfa(regexp :str) -> Nfa: (nfa, q0, f) = thompson_compile_nfa(regexp) return nfa
def make_second_nfa(): from pybgl.thompson_compile_nfa import thompson_compile_nfa (nfa, q0, f) = thompson_compile_nfa("d?") return nfa
def make_nfa(): from pybgl.thompson_compile_nfa import thompson_compile_nfa (nfa, q0, f) = thompson_compile_nfa("(a?b)*?c+d") return nfa