Пример #1
0
def test_empty_mult_suppression():
	assert conc.parse("[]0\\d").reduce() == charclass.parse("[]")
	assert conc(
		mult(pattern(), one), # this mult can never actually match anything
		mult(charclass("0"), one),
		mult(charclass("0123456789"), one),
	).reduce() == charclass.parse("[]")
Пример #2
0
def test_empty_mult_suppression():
	assert conc.parse("[]0\d").reduce() == charclass.parse("[]")
	assert conc(
		mult(pattern(), one), # this mult can never actually match anything
		mult(charclass("0"), one),
		mult(charclass("0123456789"), one),
	).reduce() == charclass.parse("[]")
Пример #3
0
def test_charclass_multiplication():
	# a * 1 = a
	assert charclass("a") * one == charclass("a")
	# a * {1,3} = a{1,3}
	assert charclass("a") * multiplier.parse("{1,3}") == mult.parse("a{1,3}")
	# a * {4,} = a{4,}
	assert charclass("a") * multiplier.parse("{4,}") == mult.parse("a{4,}")
Пример #4
0
def test_odd_bug():
	# Odd bug with ([bc]*c)?[ab]*
	int5A = mult(charclass("bc"), star).to_fsm({"a", "b", "c", fsm.anything_else})
	assert int5A.accepts([])
	assert int5A.accepts("")
	int5B = mult(charclass("c"), one).to_fsm({"a", "b", "c", fsm.anything_else})
	assert int5B.accepts("c")
	assert int5B.accepts(["c"])
	int5C = int5A + int5B
	assert int5C.accepts("c")
	assert int5C.accepts(["c"])
Пример #5
0
def test_odd_bug():
	# Odd bug with ([bc]*c)?[ab]*
	int5A = mult(charclass("bc"), star).to_fsm({"a", "b", "c", fsm.anything_else})
	assert int5A.accepts([])
	assert int5A.accepts("")
	int5B = mult(charclass("c"), one).to_fsm({"a", "b", "c", fsm.anything_else})
	assert int5B.accepts("c")
	assert int5B.accepts(["c"])
	int5C = int5A + int5B
	assert int5C.accepts("c")
	assert int5C.accepts(["c"])
Пример #6
0
def test_mult_reduction_easy():
	assert mult.parse("a").reduce() == charclass.parse("a")
	assert mult.parse("a").reduce() == charclass("a")
	assert mult.parse("a?").reduce() == mult(charclass("a"), qm)
	assert mult.parse("a{0}").reduce() == emptystring
	assert mult.parse("[]").reduce() == nothing
	assert mult.parse("[]?").reduce() == emptystring
	assert mult.parse("[]{0}").reduce() == emptystring
	assert mult.parse("[]{0,5}").reduce() == emptystring
	assert mult(pattern(), one).reduce() == nothing
	assert mult(pattern(), qm).reduce() == emptystring
	assert mult(pattern(), zero).reduce() == emptystring
	assert mult(pattern(), multiplier.parse("{0,5}")).reduce() == emptystring
Пример #7
0
def test_mult_reduction_easy():
	assert mult.parse("a").reduce() == charclass.parse("a")
	assert mult.parse("a").reduce() == charclass("a")
	assert mult.parse("a?").reduce() == mult(charclass("a"), qm)
	assert mult.parse("a{0}").reduce() == emptystring
	assert mult.parse("[]").reduce() == nothing
	assert mult.parse("[]?").reduce() == emptystring
	assert mult.parse("[]{0}").reduce() == emptystring
	assert mult.parse("[]{0,5}").reduce() == emptystring
	assert mult(pattern(), one).reduce() == nothing
	assert mult(pattern(), qm).reduce() == emptystring
	assert mult(pattern(), zero).reduce() == emptystring
	assert mult(pattern(), multiplier.parse("{0,5}")).reduce() == emptystring
Пример #8
0
def test_mult_reduction_easy():
	# mult -> mult
	# mult -> charclass
	assert mult(charclass("a"), one).reduce() == charclass("a")
	assert mult(charclass("a"), qm).reduce() == mult(charclass("a"), qm)
	assert mult(charclass("a"), zero).reduce() == emptystring
	assert mult(nothing, one).reduce() == nothing
	assert mult(nothing, qm).reduce() == emptystring
	assert mult(nothing, zero).reduce() == emptystring
	assert mult(nothing, multiplier(bound(0), bound(5))).reduce() == emptystring
	assert mult(pattern(), one).reduce() == nothing
	assert mult(pattern(), qm).reduce() == emptystring
	assert mult(pattern(), zero).reduce() == emptystring
	assert mult(pattern(), multiplier(bound(0), bound(5))).reduce() == emptystring
Пример #9
0
def test_charclass_parsing():
	assert charclass.match("a", 0) == (charclass("a"), 1)
	assert charclass.parse("a") == charclass("a")
	assert charclass.match("aa", 1) == (charclass("a"), 2)
	assert charclass.match("a$", 1) == (charclass("$"), 2)
	assert charclass.match(".", 0) == (dot, 1)
	try:
		charclass.match("[", 0)
		assert False
	except IndexError:
		pass
	try:
		charclass.match("a", 1)
		assert False
	except nomatch:
		pass
Пример #10
0
def test_charclass_fsm():
	# "[^a]"
	nota = (~charclass("a")).to_fsm()
	assert nota.alphabet == {"a", fsm.anything_else}
	assert nota.accepts("b")
	assert nota.accepts(["b"])
	assert nota.accepts([fsm.anything_else])
Пример #11
0
def test_charclass_parsing():
	assert charclass.match("a", 0) == (charclass("a"), 1)
	assert charclass.parse("a") == charclass("a")
	assert charclass.match("aa", 1) == (charclass("a"), 2)
	assert charclass.match("a$", 1) == (charclass("$"), 2)
	assert charclass.match(".", 0) == (dot, 1)
	try:
		charclass.match("[", 0)
		assert False
	except IndexError:
		pass
	try:
		charclass.match("a", 1)
		assert False
	except nomatch:
		pass
Пример #12
0
def test_charclass_fsm():
	# "[^a]"
	nota = (~charclass("a")).to_fsm()
	assert nota.alphabet == {"a", fsm.anything_else}
	assert nota.accepts("b")
	assert nota.accepts(["b"])
	assert nota.accepts([fsm.anything_else])
def assert_non_overlapping(fsa1, fsa2):
    """Assert that the intersection of two lego finite state automata is the
    empty FSA.

    """
    if not'--no-skip' in sys.argv:
        raise SkipTest
    assert fsa1 & fsa2 == lego.charclass(), ("Overlapping regex: "
                                             "{}".format(fsa1 & fsa2))
Пример #14
0
def assert_non_overlapping(fsa1, fsa2):
    """Assert that the intersection of two lego finite state automata is the
    empty FSA.

    """
    if not '--no-skip' in sys.argv:
        raise SkipTest
    assert fsa1 & fsa2 == lego.charclass(), ("Overlapping regex: "
                                             "{}".format(fsa1 & fsa2))
Пример #15
0
def test_conc_str():
	assert str(conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(charclass("d"), one),
		mult(charclass("e"), one),
		mult(~charclass("fg"), star),
		mult(charclass("h"), multiplier(bound(5), bound(5))),
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
	)) == "abcde[^fg]*h{5}[a-z]+"
Пример #16
0
def test_conc_str():
	assert str(conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(charclass("d"), one),
		mult(charclass("e"), one),
		mult(~charclass("fg"), star),
		mult(charclass("h"), multiplier(bound(5), bound(5))),
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
	)) == "abcde[^fg]*h{5}[a-z]+"
Пример #17
0
def test_charclass_gen():
	gen = charclass("xyz").strings()
	assert next(gen) == "x"
	assert next(gen) == "y"
	assert next(gen) == "z"
	try:
		next(gen)
		assert False
	except StopIteration:
		assert True
Пример #18
0
def test_charclass_gen():
	gen = charclass("xyz").strings()
	assert next(gen) == "x"
	assert next(gen) == "y"
	assert next(gen) == "z"
	try:
		next(gen)
		assert False
	except StopIteration:
		assert True
Пример #19
0
def test_mult_intersection():
	assert mult.parse("a") & mult.parse("b?") == charclass()
	assert mult.parse("a") & mult.parse("b?") == nothing
	assert mult.parse("a") & mult.parse("a?") == charclass.parse("a")
	assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}")
	assert mult.parse("a") & mult.parse("b") == charclass.parse("[]")
	assert mult.parse("a") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("b*") == conc.parse("")
	assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+")
	assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]")
	assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
Пример #20
0
def test_mult_intersection():
	assert mult.parse("a") & mult.parse("b?") == charclass()
	assert mult.parse("a") & mult.parse("b?") == nothing
	assert mult.parse("a") & mult.parse("a?") == charclass.parse("a")
	assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}")
	assert mult.parse("a") & mult.parse("b") == charclass.parse("[]")
	assert mult.parse("a") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("a") == charclass.parse("a")
	assert mult.parse("a*") & mult.parse("b*") == conc.parse("")
	assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+")
	assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]")
	assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
Пример #21
0
def test_empty():
	assert nothing.empty()
	assert charclass().empty()
	assert not dot.empty()
	assert not mult.parse("a{0}").empty()
	assert mult.parse("[]").empty()
	assert not mult.parse("[]?").empty()
	assert conc.parse("a[]").empty()
	assert not conc.parse("a[]?").empty()
	assert pattern().empty()
	assert not pattern.parse("a{0}").empty()
	assert not pattern.parse("[]?").empty()
Пример #22
0
def test_empty():
	assert nothing.empty()
	assert charclass().empty()
	assert not dot.empty()
	assert not mult.parse("a{0}").empty()
	assert mult.parse("[]").empty()
	assert not mult.parse("[]?").empty()
	assert conc.parse("a[]").empty()
	assert not conc.parse("a[]?").empty()
	assert pattern().empty()
	assert not pattern.parse("a{0}").empty()
	assert not pattern.parse("[]?").empty()
Пример #23
0
def test_pattern_equality():
	assert pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("b"), one)),
	) == pattern(
		conc(mult(charclass("b"), one)),
		conc(mult(charclass("a"), one)),
	)
	assert pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("a"), one)),
	) == pattern(
		conc(mult(charclass("a"), one)),
	)
Пример #24
0
def test_pattern_equality():
	assert pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("b"), one)),
	) == pattern(
		conc(mult(charclass("b"), one)),
		conc(mult(charclass("a"), one)),
	)
	assert pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("a"), one)),
	) == pattern(
		conc(mult(charclass("a"), one)),
	)
Пример #25
0
def test_mult_str():
	a = charclass("a")
	assert str(mult(a, one)) == "a"
	assert str(mult(a, multiplier(bound(2), bound(2)))) == "aa"
	assert str(mult(a, multiplier(bound(3), bound(3)))) == "aaa"
	assert str(mult(a, multiplier(bound(4), bound(4)))) == "aaaa"
	assert str(mult(a, multiplier(bound(5), bound(5)))) == "a{5}"
	assert str(mult(a, qm)) == "a?"
	assert str(mult(a, star)) == "a*"
	assert str(mult(a, plus)) == "a+"
	assert str(mult(a, multiplier(bound(2), bound(5)))) == "a{2,5}"
	assert str(bound(2)) == "2"
	assert str(inf) == ""
	assert str(multiplier(bound(2), inf)) == "{2,}"
	assert str(mult(a, multiplier(bound(2), inf))) == "a{2,}"
	assert str(mult(d, one)) == "\\d"
	assert str(mult(d, multiplier(bound(2), bound(2)))) == "\\d\\d"
	assert str(mult(d, multiplier(bound(3), bound(3)))) == "\\d{3}"
Пример #26
0
def test_mult_str():
	assert str(bound(2)) == "2"
	assert str(inf) == ""
	assert str(multiplier(bound(2), inf)) == "{2,}"

	a = charclass("a")
	assert str(mult(a, one)) == "a"
	assert str(mult(a, multiplier(bound(2), bound(2)))) == "a{2}"
	assert str(mult(a, multiplier(bound(3), bound(3)))) == "a{3}"
	assert str(mult(a, multiplier(bound(4), bound(4)))) == "a{4}"
	assert str(mult(a, multiplier(bound(5), bound(5)))) == "a{5}"
	assert str(mult(a, qm)) == "a?"
	assert str(mult(a, star)) == "a*"
	assert str(mult(a, plus)) == "a+"
	assert str(mult(a, multiplier(bound(2), bound(5)))) == "a{2,5}"
	assert str(mult(a, multiplier(bound(2), inf))) == "a{2,}"
	assert str(mult(d, one)) == "\\d"
	assert str(mult(d, multiplier(bound(2), bound(2)))) == "\\d{2}"
	assert str(mult(d, multiplier(bound(3), bound(3)))) == "\\d{3}"
Пример #27
0
def test_mult_intersection():
	# a & b? = nothing
	assert mult.parse("a") & mult.parse("b?") == charclass()
	assert mult.parse("a") & mult.parse("b?") == nothing

	# a & a? = nothing
	assert mult.parse("a").reduce() == charclass.parse("a")
	assert mult.parse("a") & mult.parse("a?") == charclass.parse("a")

	# a{2} & a{2,} = a{2}
	assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}")

	# a & b -> no intersection.
	assert mult.parse("a") & mult.parse("b") == charclass.parse("[]")

	# a & a -> a
	assert mult.parse("a") & mult.parse("a") == charclass.parse("a")

	# a* & a -> a
	assert mult.parse("a*") & mult.parse("a") == charclass.parse("a")

	# a* & b* -> emptystring
	assert mult.parse("a*") & mult.parse("b*") == conc.parse("")

	# a* & a+ -> a+
	assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+")

	# aa & aaaa -> []
	assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]")

	# a{3,4} & a{2,5} -> a{2,3}
	assert mult.parse("a{3,4}").common(mult.parse("a{2,5}")) == mult.parse("a{2,3}")

	# a{2,} & a{1,5} -> a{1,5}
	assert mult.parse("a{2,}").common(mult.parse("a{1,5}")) == mult.parse("a{1,5}")

	# a{3,}, a{2,} -> a{2,} (with a, epsilon left over)
	assert mult.parse("a{3,}").common(mult.parse("a{2,}")) == mult.parse("a{2,}")

	# a{3,}, a{3,} -> a{3,} (with inf, inf left over)
	assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
Пример #28
0
def test_mult_parsing():
	assert mult.parse("[a-g]+") == mult(charclass("abcdefg"), plus)
	assert mult.parse("[a-g0-8$%]+") == mult(charclass("abcdefg012345678$%"), plus)
	assert mult.parse("[a-g0-8$%\\^]+") == mult(charclass("abcdefg012345678$%^"), plus)
	assert mult.match("abcde[^fg]*", 5) == (
		mult(~charclass("fg"), star),
		11
	)
	assert mult.match("abcde[^fg]*h{5}[a-z]+", 11) == (
		mult(charclass("h"), multiplier(bound(5), bound(5))),
		15
	)
	assert mult.match("abcde[^fg]*h{5}[a-z]+T{1,}", 15) == (
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
		21
	)
	assert mult.match("abcde[^fg]*h{5}[a-z]+T{2,}", 21) == (
		mult(charclass("T"), multiplier(bound(2), inf)),
		26
	)
Пример #29
0
def test_mult_parsing():
	assert mult.parse("[a-g]+") == mult(charclass("abcdefg"), plus)
	assert mult.parse("[a-g0-8$%]+") == mult(charclass("abcdefg012345678$%"), plus)
	assert mult.parse("[a-g0-8$%\\^]+") == mult(charclass("abcdefg012345678$%^"), plus)
	assert mult.match("abcde[^fg]*", 5) == (
		mult(~charclass("fg"), star),
		11
	)
	assert mult.match("abcde[^fg]*h{5}[a-z]+", 11) == (
		mult(charclass("h"), multiplier(bound(5), bound(5))),
		15
	)
	assert mult.match("abcde[^fg]*h{5}[a-z]+T{1,}", 15) == (
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
		21
	)
	assert mult.match("abcde[^fg]*h{5}[a-z]+T{2,}", 21) == (
		mult(charclass("T"), multiplier(bound(2), inf)),
		26
	)
Пример #30
0
def test_pattern_str():
	assert str(pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("b"), one)),
	)) == "a|b"
	assert str(pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("a"), one)),
	)) == "a"
	assert str(pattern(
		conc(
			mult(charclass("a"), one),
			mult(charclass("b"), one),
			mult(charclass("c"), one),
		),
		conc(
			mult(charclass("d"), one),
			mult(charclass("e"), one),
			mult(charclass("f"), one),
			mult(
				pattern(
					conc(
						mult(charclass("g"), one),
						mult(charclass("h"), one),
						mult(charclass("i"), one),
					),
					conc(
						mult(charclass("j"), one),
						mult(charclass("k"), one),
						mult(charclass("l"), one),
					),
				), one
			),
		),
	)) == "abc|def(ghi|jkl)"
Пример #31
0
def test_pattern_parsing():
	assert pattern.parse("abc|def(ghi|jkl)") == pattern(
		conc(
			mult(charclass("a"), one),
			mult(charclass("b"), one),
			mult(charclass("c"), one),
		),
		conc(
			mult(charclass("d"), one),
			mult(charclass("e"), one),
			mult(charclass("f"), one),
			mult(
				pattern(
					conc(
						mult(charclass("g"), one),
						mult(charclass("h"), one),
						mult(charclass("i"), one),
					),
					conc(
						mult(charclass("j"), one),
						mult(charclass("k"), one),
						mult(charclass("l"), one),
					),
				), one
			),
		)
	)

	# Accept the "non-capturing group" syntax, "(?: ... )" but give it no
	# special significance
	assert parse("(?:)") == parse("()")
	assert parse("(?:abc|def)") == parse("(abc|def)")
	parse("(:abc)") # should give no problems

	# Named groups
	assert pattern.parse("(?P<ng1>abc)") == parse("(abc)")
Пример #32
0
def test_charclass_multiplication():
	assert charclass("a") * one == charclass("a")
	assert charclass("a") * multiplier.parse("{1,3}") == mult.parse("a{1,3}")
	assert charclass("a") * multiplier.parse("{4,}") == mult.parse("a{4,}")
Пример #33
0
def test_charclass_intersection():
	# [ab] n [bc] = [b]
	assert charclass("ab") & charclass("bc") == charclass("b")
	# [ab] n [^bc] = [a]
	assert charclass("ab") & ~charclass("bc") == charclass("a")
	# [^ab] n [bc] = [c]
	assert ~charclass("ab") & charclass("bc") == charclass("c")
	# [^ab] n [^bc] = [^abc]
	assert ~charclass("ab") & ~charclass("bc") == ~charclass("abc")
Пример #34
0
def test_charclass_negation():
	assert ~~charclass("a") == charclass("a")
	assert charclass("a") == ~~charclass("a")
Пример #35
0
def test_conc_equality():
	assert conc(mult(charclass("a"), one)) == conc(mult(charclass("a"), one))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("b"), one))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("a"), qm))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("a"), multiplier(bound(1), bound(2))))
	assert conc(mult(charclass("a"), one)) != emptystring
Пример #36
0
def test_repr():
	assert repr(~charclass("a")) == "~charclass('a')"
Пример #37
0
def test_mult_equality():
	assert mult(charclass("a"), one) == mult(charclass("a"), one)
	assert mult(charclass("a"), one) != mult(charclass("b"), one)
	assert mult(charclass("a"), one) != mult(charclass("a"), qm)
	assert mult(charclass("a"), one) != mult(charclass("a"), multiplier(bound(1), bound(2)))
	assert mult(charclass("a"), one) != charclass("a")
Пример #38
0
    def lego(self):
        '''
			This is the big kahuna of this module. Turn the present FSM into a regular
			expression object, as imported from the lego module. This is accomplished
			using the Brzozowski algebraic method.
		'''
        from greenery.lego import nothing, charclass, emptystring, star, otherchars

        # We need a new state not already used; guess first beyond current len
        outside = len(self.states)
        while outside in self.states:
            outside += 1

        # The set of strings that would be accepted by this FSM if you started
        # at state i is represented by the regex R_i.
        # If state i has a sole transition "a" to state j, then we know R_i = a R_j.
        # If state i is final, then the empty string is also accepted by this regex.
        # And so on...

        # From this we can build a set of simultaneous equations in len(self.states)
        # variables. This system is easily solved for all variables, but we only
        # need one: R_a, where a is the starting state.

        # The first thing we need to do is organise the states into order of depth,
        # so that when we perform our back-substitutions, we can start with the
        # last (deepest) state and therefore finish with R_a.
        states = [self.initial]
        i = 0
        while i < len(states):
            current = states[i]
            for symbol in sorted(self.alphabet, key=str):
                next = self.map[current][symbol]
                if next not in states:
                    states.append(next)
            i += 1

        # Our system of equations is represented like so:
        brz = {}
        for a in self.states:
            brz[a] = {}
            for b in self.states | set([outside]):
                brz[a][b] = nothing

        # Populate it with some initial data.
        for a in self.map:
            for symbol in self.map[a]:
                b = self.map[a][symbol]
                if symbol == otherchars:
                    brz[a][b] |= ~charclass(self.alphabet - set([otherchars]))
                else:
                    brz[a][b] |= charclass(set([symbol]))
            if a in self.finals:
                brz[a][outside] |= emptystring

        # Now perform our back-substitution
        for i in reversed(range(len(states))):
            a = states[i]

            # Before the equation for R_a can be substituted into the other
            # equations, we need to resolve the self-transition (if any).
            # e.g.    R_a = 0 R_a |   1 R_b |   2 R_c
            # becomes R_a =         0*1 R_b | 0*2 R_c
            loop = brz[a][a] * star  # i.e. "0*"
            del brz[a][a]

            for right in brz[a]:
                brz[a][right] = loop + brz[a][right]

            # Note: even if we're down to our final equation, the above step still
            # needs to be performed before anything is returned.

            # Now we can substitute this equation into all of the previous ones.
            for j in range(i):
                b = states[j]

                # e.g. substituting R_a =  0*1 R_b |      0*2 R_c
                # into              R_b =    3 R_a |        4 R_c | 5 R_d
                # yields            R_b = 30*1 R_b | (30*2|4) R_c | 5 R_d
                univ = brz[b][a]  # i.e. "3"
                del brz[b][a]

                for right in brz[a]:
                    brz[b][right] |= univ + brz[a][right]

        return brz[self.initial][outside]
Пример #39
0
def test_empty_pattern_reduction():
	assert pattern().reduce() == charclass()
Пример #40
0
def test_conc_parsing():
	assert conc.parse("abcde[^fg]*h{5}[a-z]+") == conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(charclass("d"), one),
		mult(charclass("e"), one),
		mult(~charclass("fg"), star),
		mult(charclass("h"), multiplier(bound(5), bound(5))),
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
	)
	assert conc.parse("[bc]*[ab]*") == conc(
		mult(charclass("bc"), star),
		mult(charclass("ab"), star),
	)
	assert conc.parse("abc...") == conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(dot, one),
		mult(dot, one),
		mult(dot, one),
	)
	assert conc.parse("\\d{4}-\\d{2}-\\d{2}") == conc(
		mult(charclass("0123456789"), multiplier(bound(4), bound(4))),
		mult(charclass("-"), one),
		mult(charclass("0123456789"), multiplier(bound(2), bound(2))),
		mult(charclass("-"), one),
		mult(charclass("0123456789"), multiplier(bound(2), bound(2))),
	)
Пример #41
0
def test_charclass_union():
	# [ab] u [bc] = [abc]
	assert charclass("ab") | charclass("bc") == charclass("abc")
	# [ab] u [^bc] = [^c]
	assert charclass("ab") | ~charclass("bc") == ~charclass("c")
	# [^a] u [bc] = [^a]
	assert ~charclass("ab") | charclass("bc") == ~charclass("a")
	# [^ab] u [^bc] = [^b]
	assert ~charclass("ab") | ~charclass("bc") == ~charclass("b")
Пример #42
0
def test_conc_equality():
	assert conc(mult(charclass("a"), one)) == conc(mult(charclass("a"), one))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("b"), one))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("a"), qm))
	assert conc(mult(charclass("a"), one)) != conc(mult(charclass("a"), multiplier(bound(1), bound(2))))
	assert conc(mult(charclass("a"), one)) != emptystring
Пример #43
0
def test_charclass_equality():
	assert charclass("a") == charclass("a")
	assert ~charclass("a") == ~charclass("a")
	assert ~charclass("a") != charclass("a")
	assert charclass("ab") == charclass("ba")
Пример #44
0
def test_charclass_equality():
	assert charclass("a") == charclass("a")
	assert ~charclass("a") == ~charclass("a")
	assert ~charclass("a") != charclass("a")
	assert charclass("ab") == charclass("ba")
Пример #45
0
def test_repr():
	assert repr(~charclass("a")) == "~charclass('a')"
Пример #46
0
def test_charclass_multiplication():
	assert charclass("a") * one == charclass("a")
	assert charclass("a") * multiplier.parse("{1,3}") == mult.parse("a{1,3}")
	assert charclass("a") * multiplier.parse("{4,}") == mult.parse("a{4,}")
Пример #47
0
def test_charclass_str():
	assert str(w) == "\\w"
	assert str(d) == "\\d"
	assert str(s) == "\\s"
	assert str(charclass("a")) == "a"
	assert str(charclass("{")) == "\\{"
	assert str(charclass("\t")) == "\\t"
	assert str(charclass("ab")) == "[ab]"
	assert str(charclass("a{")) == "[a{]"
	assert str(charclass("a\t")) == "[\\ta]"
	assert str(charclass("a-")) == "[\\-a]"
	assert str(charclass("a[")) == "[\\[a]"
	assert str(charclass("a]")) == "[\\]a]"
	assert str(charclass("ab")) == "[ab]"
	assert str(charclass("abc")) == "[abc]"
	assert str(charclass("abcd")) == "[a-d]"
	assert str(charclass("abcdfghi")) == "[a-df-i]"
	assert str(charclass("^")) == "^"
	assert str(charclass("\\")) == "\\\\"
	assert str(charclass("a^")) == "[\\^a]"
	assert str(charclass("0123456789a")) == "[0-9a]"
	assert str(charclass("\t\v\r A")) == "[\\t\\v\\r A]"
	assert str(charclass("\n\f A")) == "[\\n\\f A]"
	assert str(charclass("\t\n\v\f\r A")) == "[\\t-\\r A]"
	assert str(charclass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz|")) == "[0-9A-Z_a-z|]"
	assert str(W) == "\\W"
	assert str(D) == "\\D"
	assert str(S) == "\\S"
	assert str(dot) == "."
	assert str(~charclass("")) == "."
	assert str(~charclass("a")) == "[^a]"
	assert str(~charclass("{")) == "[^{]"
	assert str(~charclass("\t")) == "[^\\t]"
	assert str(~charclass("^")) == "[^\\^]"

	# Arbitrary ranges
	assert str(parse("[\\w:;<=>?@\\[\\\\\\]\\^`]")) == "[0-z]"
	# TODO: what if \d is a proper subset of `chars`?

	# escape sequences are not preserved
	assert str(parse("\\x09")) == "\\t"

	# Printing ASCII control characters? You should get hex escapes
	assert str(parse("\\x00")) == "\\x00"
Пример #48
0
def test_pattern_str():
	assert str(pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("b"), one)),
	)) == "a|b"
	assert str(pattern(
		conc(mult(charclass("a"), one)),
		conc(mult(charclass("a"), one)),
	)) == "a"
	assert str(pattern(
		conc(
			mult(charclass("a"), one),
			mult(charclass("b"), one),
			mult(charclass("c"), one),
		),
		conc(
			mult(charclass("d"), one),
			mult(charclass("e"), one),
			mult(charclass("f"), one),
			mult(
				pattern(
					conc(
						mult(charclass("g"), one),
						mult(charclass("h"), one),
						mult(charclass("i"), one),
					),
					conc(
						mult(charclass("j"), one),
						mult(charclass("k"), one),
						mult(charclass("l"), one),
					),
				), one
			),
		),
	)) == "abc|def(ghi|jkl)"
Пример #49
0
def test_mult_equality():
	assert mult(charclass("a"), one) == mult(charclass("a"), one)
	assert mult(charclass("a"), one) != mult(charclass("b"), one)
	assert mult(charclass("a"), one) != mult(charclass("a"), qm)
	assert mult(charclass("a"), one) != mult(charclass("a"), multiplier(bound(1), bound(2)))
	assert mult(charclass("a"), one) != charclass("a")
Пример #50
0
def test_charclass_union():
	# [ab] u [bc] = [abc]
	assert charclass("ab") | charclass("bc") == charclass("abc")
	# [ab] u [^bc] = [^c]
	assert charclass("ab") | ~charclass("bc") == ~charclass("c")
	# [^a] u [bc] = [^a]
	assert ~charclass("ab") | charclass("bc") == ~charclass("a")
	# [^ab] u [^bc] = [^b]
	assert ~charclass("ab") | ~charclass("bc") == ~charclass("b")
Пример #51
0
def test_charclass_negation():
	assert ~~charclass("a") == charclass("a")
	assert charclass("a") == ~~charclass("a")
Пример #52
0
def test_empty_pattern_reduction():
	assert pattern().reduce() == charclass()
Пример #53
0
def test_charclass_intersection():
	# [ab] n [bc] = [b]
	assert charclass("ab") & charclass("bc") == charclass("b")
	# [ab] n [^bc] = [a]
	assert charclass("ab") & ~charclass("bc") == charclass("a")
	# [^ab] n [bc] = [c]
	assert ~charclass("ab") & charclass("bc") == charclass("c")
	# [^ab] n [^bc] = [^abc]
	assert ~charclass("ab") & ~charclass("bc") == ~charclass("abc")
Пример #54
0
	def lego(self):
		'''
			This is the big kahuna of this module. Turn the present FSM into a regular
			expression object, as imported from the lego module. This is accomplished
			using the Brzozowski algebraic method.
		'''
		from greenery.lego import nothing, charclass, emptystring, star, otherchars

		# We need a new state not already used; guess first beyond current len
		outside = len(self.states)
		while outside in self.states:
			outside += 1

		# The set of strings that would be accepted by this FSM if you started
		# at state i is represented by the regex R_i.
		# If state i has a sole transition "a" to state j, then we know R_i = a R_j.
		# If state i is final, then the empty string is also accepted by this regex.
		# And so on...

		# From this we can build a set of simultaneous equations in len(self.states)
		# variables. This system is easily solved for all variables, but we only
		# need one: R_a, where a is the starting state.

		# The first thing we need to do is organise the states into order of depth,
		# so that when we perform our back-substitutions, we can start with the
		# last (deepest) state and therefore finish with R_a.
		states = [self.initial]
		i = 0
		while i < len(states):
			current = states[i]
			for symbol in sorted(self.alphabet, key=str):
				next = self.map[current][symbol]
				if next not in states:
					states.append(next)
			i += 1

		# Our system of equations is represented like so:
		brz = {}
		for a in self.states:
			brz[a] = {}
			for b in self.states | {outside}:
				brz[a][b] = nothing

		# Populate it with some initial data.
		for a in self.map:
			for symbol in self.map[a]:
				b = self.map[a][symbol]
				if symbol == otherchars:
					brz[a][b] |= ~charclass(self.alphabet - {otherchars})
				else:
					brz[a][b] |= charclass({symbol})
			if a in self.finals:
				brz[a][outside] |= emptystring

		# Now perform our back-substitution
		for i in reversed(range(len(states))):
			a = states[i]

			# Before the equation for R_a can be substituted into the other
			# equations, we need to resolve the self-transition (if any).
			# e.g.    R_a = 0 R_a |   1 R_b |   2 R_c
			# becomes R_a =         0*1 R_b | 0*2 R_c
			loop = brz[a][a] * star # i.e. "0*"
			del brz[a][a]

			for right in brz[a]:
				brz[a][right] = loop + brz[a][right]

			# Note: even if we're down to our final equation, the above step still
			# needs to be performed before anything is returned.

			# Now we can substitute this equation into all of the previous ones.
			for j in range(i):
				b = states[j]

				# e.g. substituting R_a =  0*1 R_b |      0*2 R_c
				# into              R_b =    3 R_a |        4 R_c | 5 R_d
				# yields            R_b = 30*1 R_b | (30*2|4) R_c | 5 R_d
				univ = brz[b][a] # i.e. "3"
				del brz[b][a]

				for right in brz[a]:
					brz[b][right] |= univ + brz[a][right]

		return brz[self.initial][outside]
Пример #55
0
def test_conc_parsing():
	assert conc.parse("abcde[^fg]*h{5}[a-z]+") == conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(charclass("d"), one),
		mult(charclass("e"), one),
		mult(~charclass("fg"), star),
		mult(charclass("h"), multiplier(bound(5), bound(5))),
		mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus),
	)
	assert conc.parse("[bc]*[ab]*") == conc(
		mult(charclass("bc"), star),
		mult(charclass("ab"), star),
	)
	assert conc.parse("abc...") == conc(
		mult(charclass("a"), one),
		mult(charclass("b"), one),
		mult(charclass("c"), one),
		mult(dot, one),
		mult(dot, one),
		mult(dot, one),
	)
	assert conc.parse("\\d{4}-\\d{2}-\\d{2}") == conc(
		mult(charclass("0123456789"), multiplier(bound(4), bound(4))),
		mult(charclass("-"), one),
		mult(charclass("0123456789"), multiplier(bound(2), bound(2))),
		mult(charclass("-"), one),
		mult(charclass("0123456789"), multiplier(bound(2), bound(2))),
	)
Пример #56
0
def test_pattern_parsing():
	assert pattern.parse("abc|def(ghi|jkl)") == pattern(
		conc(
			mult(charclass("a"), one),
			mult(charclass("b"), one),
			mult(charclass("c"), one),
		),
		conc(
			mult(charclass("d"), one),
			mult(charclass("e"), one),
			mult(charclass("f"), one),
			mult(
				pattern(
					conc(
						mult(charclass("g"), one),
						mult(charclass("h"), one),
						mult(charclass("i"), one),
					),
					conc(
						mult(charclass("j"), one),
						mult(charclass("k"), one),
						mult(charclass("l"), one),
					),
				), one
			),
		)
	)

	# Accept the "non-capturing group" syntax, "(?: ... )" but give it no
	# special significance
	assert parse("(?:)") == parse("()")
	assert parse("(?:abc|def)") == parse("(abc|def)")
	parse("(:abc)") # should give no problems

	# Named groups
	assert pattern.parse("(?P<ng1>abc)") == parse("(abc)")
Пример #57
0
def test_charclass_str():
	assert str(w) == "\\w"
	assert str(d) == "\\d"
	assert str(s) == "\\s"
	assert str(charclass("a")) == "a"
	assert str(charclass("{")) == "\\{"
	assert str(charclass("\t")) == "\\t"
	assert str(charclass("ab")) == "[ab]"
	assert str(charclass("a{")) == "[a{]"
	assert str(charclass("a\t")) == "[\\ta]"
	assert str(charclass("a-")) == "[\\-a]"
	assert str(charclass("a[")) == "[\\[a]"
	assert str(charclass("a]")) == "[\\]a]"
	assert str(charclass("ab")) == "[ab]"
	assert str(charclass("abc")) == "[abc]"
	assert str(charclass("abcd")) == "[a-d]"
	assert str(charclass("abcdfghi")) == "[a-df-i]"
	assert str(charclass("^")) == "^"
	assert str(charclass("\\")) == "\\\\"
	assert str(charclass("a^")) == "[\\^a]"
	assert str(charclass("0123456789a")) == "[0-9a]"
	assert str(charclass("\t\v\r A")) == "[\\t\\v\\r A]"
	assert str(charclass("\n\f A")) == "[\\n\\f A]"
	assert str(charclass("\t\n\v\f\r A")) == "[\\t-\\r A]"
	assert str(charclass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz|")) == "[0-9A-Z_a-z|]"
	assert str(W) == "\\W"
	assert str(D) == "\\D"
	assert str(S) == "\\S"
	assert str(dot) == "."
	assert str(~charclass("")) == "."
	assert str(~charclass("a")) == "[^a]"
	assert str(~charclass("{")) == "[^{]"
	assert str(~charclass("\t")) == "[^\\t]"
	assert str(~charclass("^")) == "[^\\^]"

	# Arbitrary ranges
	assert str(parse("[\w:;<=>?@\\[\\\\\]\\^`]")) == "[0-z]"
	# TODO: what if \d is a proper subset of `chars`?

	# escape sequences are not preserved
	assert str(parse("\\x09")) == "\\t"

	# Printing ASCII control characters? You should get hex escapes
	assert str(parse("\\x00")) == "\\x00"