def test_binary_3(): # Binary numbers divisible by 3. # Disallows the empty string # Allows "0" on its own, but not leading zeroes. div3 = from_fsm(fsm.fsm( alphabet = {"0", "1"}, states = {"initial", "zero", 0, 1, 2, None}, initial = "initial", finals = {"zero", 0}, map = { "initial" : {"0" : "zero", "1" : 1 }, "zero" : {"0" : None , "1" : None}, 0 : {"0" : 0 , "1" : 1 }, 1 : {"0" : 2 , "1" : 0 }, 2 : {"0" : 1 , "1" : 2 }, None : {"0" : None , "1" : None}, }, )) assert str(div3) == "0|1(01*0|10*1)*10*" gen = div3.strings() assert next(gen) == "0" assert next(gen) == "11" assert next(gen) == "110" assert next(gen) == "1001" assert next(gen) == "1100"
def test_base_N(): # Machine accepts only numbers in selected base (e.g. 2, 10) that are # divisible by N (e.g. 3, 7). # "0" alone is acceptable, but leading zeroes (e.g. "00", "07") are not base = 2 N = 3 assert base <= 10 divN = from_fsm(fsm.fsm( alphabet = set(str(i) for i in range(base)), states = set(range(N)) | {"initial", "zero", None}, initial = "initial", finals = {"zero", 0}, map = dict( [ ("initial", dict([(str(j), j % N) for j in range(1, base)] + [("0", "zero")])), ("zero" , dict([(str(j), None ) for j in range( base)] )), (None , dict([(str(j), None ) for j in range( base)] )), ] + [ (i , dict([(str(j), (i * base + j) % N) for j in range( base)] )) for i in range(N) ] ), )) gen = divN.strings() a = next(gen) assert a == "0" for i in range(7): b = next(gen) assert int(a, base) + N == int(b, base) a = b
def test_even_star_bug(): # Bug fix. This is a(a{2})* (i.e. accepts an odd number of "a" chars in a # row), but when from_fsm() is called, the result is "a+". Turned out to be # a fault in the lego.multiplier.__mul__() routine elesscomplex = fsm.fsm( alphabet = {"a"}, states = {0, 1}, initial = 0, finals = {1}, map = { 0 : {"a" : 1}, 1 : {"a" : 0}, }, ) assert not elesscomplex.accepts("") assert elesscomplex.accepts("a") assert not elesscomplex.accepts("aa") assert elesscomplex.accepts("aaa") elesscomplex = from_fsm(elesscomplex) assert str(elesscomplex) in {"a(aa)*", "(aa)*a"} elesscomplex = elesscomplex.to_fsm() assert not elesscomplex.accepts("") assert elesscomplex.accepts("a") assert not elesscomplex.accepts("aa") assert elesscomplex.accepts("aaa") gen = elesscomplex.strings() assert next(gen) == ["a"] assert next(gen) == ["a", "a", "a"] assert next(gen) == ["a", "a", "a", "a", "a"] assert next(gen) == ["a", "a", "a", "a", "a", "a", "a"]
def dfa_to_regex(): received_json = request.get_json(silent=True) constructed_automata = fsm.fsm( alphabet=set(received_json['alphabet']), states=set(received_json['states']), initial=received_json['initial'], finals=set(received_json['finals']), map=received_json['map'], ) constructed_regex = lego.from_fsm(constructed_automata) constructed_regex.reduce() return jsonify({ "regex": str(lego.from_fsm(constructed_automata)) })
def entry(apiomemetic_entity, entity_apiomemetic): from greenery import lego aut = do_thing(apiomemetic_entity) fsm = to_fsm(aut) print("accepts", fsm.cardinality(), "strings") regex = str(lego.from_fsm(fsm)) print(regex) import re return bool(re.match(regex, entity_apiomemetic))
def test_fsm(): # You should be able to to_fsm() a single lego piece without supplying a specific # alphabet. That should be determinable from context. assert str(from_fsm(parse("a.b").to_fsm())) == "a.b" # not "a[ab]b" # A suspiciously familiar example bad = parse("0{2}|1{2}").to_fsm() assert bad.accepts("00") assert bad.accepts("11") assert not bad.accepts("01") assert str(parse("0|[1-9]|ab").reduce()) == "\d|ab"
def test_bad_alphabet(): # You can use anything you like in your FSM alphabet, but if you try to # convert it to a `lego` object then the only acceptable symbols are single # characters or `fsm.anything_else`. for bad_symbol in [None, (), 0, ("a",), "", "aa", "ab", True]: f = fsm.fsm( alphabet = {bad_symbol}, states = {0}, initial = 0, finals = set(), map = { 0 : {bad_symbol : 0} }, ) try: from_fsm(f) assert False except AssertionError as e: raise Exception("Accepted bad symbol: " + repr(bad_symbol)) except Exception as e: pass
def test_silly_reduction(): # This one is horrendous and we have to jump through some hoops to get to # a sensible result. Probably not a good unit test actually. long = \ "(aa|bb*aa)a*|((ab|bb*ab)|(aa|bb*aa)a*b)((ab|bb*ab)|(aa|bb*aa)a*b)*" + \ "(aa|bb*aa)a*|((ab|bb*ab)|(aa|bb*aa)a*b)((ab|bb*ab)|(aa|bb*aa)a*b)*" long = parse(long) long = reversed(long.to_fsm()) long = reversed(from_fsm(long)) assert str(long) == "[ab]*a[ab]" short = "[ab]*a?b*|[ab]*b?a*" assert str(parse(".*") & parse(short)) == "[ab]*"
def test_abstar(): # Buggggs. abstar = fsm.fsm( alphabet = {'a', fsm.anything_else, 'b'}, states = {0, 1}, initial = 0, finals = {0}, map = { 0: {'a': 0, fsm.anything_else: 1, 'b': 0}, 1: {'a': 1, fsm.anything_else: 1, 'b': 1} } ) assert str(from_fsm(abstar)) == "[ab]*"
def __str__(self): """ str representation of the language accepted by this DFA: - option 1: if language has finite number of words -> return string with all accepted words. - option 2 (costly): convert fsm to regex with greenery :rtype: str """ if self.has_finite_len(): return self._get_strings_set_str() if self.is_all_words == MinDFA.Ternary.TRUE: return "*" # TODO: consider performance implications of this conversion from MinDFA to regex return str(from_fsm(self))
def test_dead_default(): blockquote = from_fsm(fsm.fsm( alphabet = {"/", "*", fsm.anything_else}, states = {0, 1, 2, 3, 4}, initial = 0, finals = {4}, map = { 0 : {"/" : 1}, 1 : {"*" : 2}, 2 : {"/" : 2, fsm.anything_else : 2, "*" : 3}, 3 : {"/" : 4, fsm.anything_else : 2, "*" : 3}, } ))
def test_lego_recursion_error(): # Catch a recursion error assert str(from_fsm(fsm.fsm( alphabet = {"0", "1"}, states = {0, 1, 2, 3}, initial = 3, finals = {1}, map = { 0: {"0": 1, "1": 1}, 1: {"0": 2, "1": 2}, 2: {"0": 2, "1": 2}, 3: {"0": 0, "1": 2}, } ))) == "0[01]"
def test_adotb(): adotb = fsm.fsm( alphabet = {'a', fsm.anything_else, 'b'}, states = {0, 1, 2, 3, 4}, initial = 0, finals = {4}, map = { 0: {'a': 2, fsm.anything_else: 1, 'b': 1}, 1: {'a': 1, fsm.anything_else: 1, 'b': 1}, 2: {'a': 3, fsm.anything_else: 3, 'b': 3}, 3: {'a': 1, fsm.anything_else: 1, 'b': 4}, 4: {'a': 1, fsm.anything_else: 1, 'b': 1} } ) assert str(from_fsm(adotb)) == "a.b"
def graph2fsm(g): states = set(g.node.keys()) # map alphabet to regex compatible chars sym = 'a' alph_mapper = dict() for state in states: alph_mapper[state] = sym sym = chr(ord(sym) + 1) # transitions: (FROM_STATE -> (INPUT -> TO_STATE)) transistions = dict() for state in states: succs = g.adj.get(state) succs = set(succs.keys()) transistions[state] = dict( map(lambda e: (alph_mapper.get(e), e), succs)) # find start node start_node = "" for (k, v) in g.in_degree().items(): if v == 0: if start_node != "": print("Error: More than one start states!") else: start_node = k # find end nodes end_nodes = dict((k, v) for k, v in g.out_degree().items() if v == 0) end_nodes = set(end_nodes.keys()) # construct fsm from digraph data f = fsm.fsm( alphabet=alph_mapper.values(), states=states, initial=start_node, finals=end_nodes, map=transistions, ) # convert fsm to regex r = lego.from_fsm(f) print("Regex: {}".format(r)) print("Regex Mapping:\n--------------") for (k, v) in alph_mapper.items(): print("{}: \t {}".format(v, k)) return r
def main(): grammar = get_grammar() parser = get_parser(grammar) finals=[] productions={} for left, right in parser.productions.items(): for handle in right: if handle is not parser.EMPTY: if left not in productions: productions[left] = {} productions[left][handle[0]] = handle[1] else: finals.append(left) machine = fsm.fsm( alphabet = set(parser.terminals), states = set(parser.non_terminals), initial = parser.start_symbol, finals = set(finals), map = productions ) rex = lego.from_fsm(machine) print(machine) print(rex)
def test_bug_slow(): # issue #43 import time m = fsm.fsm( alphabet = {'R', 'L', 'U', 'D'}, states = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20}, initial = 0, finals = {20}, map = {0: {'D': 1, 'U': 2}, 1: {'L': 3}, 2: {'L': 4}, 3: {'U': 5}, 4: {'D': 6}, 5: {'R': 7}, 6: {'R': 8}, 7: {'U': 9}, 8: {'D': 10}, 9: {'L': 11}, 10: {'L': 12}, 11: {'L': 13}, 12: {'L': 14}, 13: {'D': 15}, 14: {'U': 16}, 15: {'R': 17}, 16: {'R': 18}, 17: {'D': 19}, 18: {'U': 19}, 19: {'L': 20}, 20: {}}) t1 = time.time() l = from_fsm(m) t2 = time.time() assert (t2 - t1) < 60 # should finish in way under 1s assert l == parse("(DLURULLDRD|ULDRDLLURU)L").reduce()
from greenery import fsm, lego import re import sys A, B, C = range(3) a, b = '0', '1' # create the FSM machine = fsm.fsm( alphabet = {a, b}, states = {A, B, C}, initial = A, finals = {A}, map = { A : {a: A, b: B}, B : {a: C, b: A}, C : {a: B, b: C}, }, ) # convert it to regex rex = lego.from_fsm(machine) print(rex) for line in sys.stdin: line = line.strip() if re.fullmatch(r"(0|1(01*0)*1)*", line): print(line)
def test_dot(): assert str(from_fsm(parse("a.b").to_fsm())) == "a.b" # not "a[ab]b"