def test_empty_mult_suppression(): assert conc.parse("[]0\\d").reduce() == charclass.parse("[]") assert conc( mult(pattern(), one), # this mult can never actually match anything mult(charclass("0"), one), mult(charclass("0123456789"), one), ).reduce() == charclass.parse("[]")
def test_empty_mult_suppression(): assert conc.parse("[]0\d").reduce() == charclass.parse("[]") assert conc( mult(pattern(), one), # this mult can never actually match anything mult(charclass("0"), one), mult(charclass("0123456789"), one), ).reduce() == charclass.parse("[]")
def test_charclass_multiplication(): # a * 1 = a assert charclass("a") * one == charclass("a") # a * {1,3} = a{1,3} assert charclass("a") * multiplier.parse("{1,3}") == mult.parse("a{1,3}") # a * {4,} = a{4,} assert charclass("a") * multiplier.parse("{4,}") == mult.parse("a{4,}")
def test_odd_bug(): # Odd bug with ([bc]*c)?[ab]* int5A = mult(charclass("bc"), star).to_fsm({"a", "b", "c", fsm.anything_else}) assert int5A.accepts([]) assert int5A.accepts("") int5B = mult(charclass("c"), one).to_fsm({"a", "b", "c", fsm.anything_else}) assert int5B.accepts("c") assert int5B.accepts(["c"]) int5C = int5A + int5B assert int5C.accepts("c") assert int5C.accepts(["c"])
def test_mult_reduction_easy(): assert mult.parse("a").reduce() == charclass.parse("a") assert mult.parse("a").reduce() == charclass("a") assert mult.parse("a?").reduce() == mult(charclass("a"), qm) assert mult.parse("a{0}").reduce() == emptystring assert mult.parse("[]").reduce() == nothing assert mult.parse("[]?").reduce() == emptystring assert mult.parse("[]{0}").reduce() == emptystring assert mult.parse("[]{0,5}").reduce() == emptystring assert mult(pattern(), one).reduce() == nothing assert mult(pattern(), qm).reduce() == emptystring assert mult(pattern(), zero).reduce() == emptystring assert mult(pattern(), multiplier.parse("{0,5}")).reduce() == emptystring
def test_mult_reduction_easy(): # mult -> mult # mult -> charclass assert mult(charclass("a"), one).reduce() == charclass("a") assert mult(charclass("a"), qm).reduce() == mult(charclass("a"), qm) assert mult(charclass("a"), zero).reduce() == emptystring assert mult(nothing, one).reduce() == nothing assert mult(nothing, qm).reduce() == emptystring assert mult(nothing, zero).reduce() == emptystring assert mult(nothing, multiplier(bound(0), bound(5))).reduce() == emptystring assert mult(pattern(), one).reduce() == nothing assert mult(pattern(), qm).reduce() == emptystring assert mult(pattern(), zero).reduce() == emptystring assert mult(pattern(), multiplier(bound(0), bound(5))).reduce() == emptystring
def test_charclass_parsing(): assert charclass.match("a", 0) == (charclass("a"), 1) assert charclass.parse("a") == charclass("a") assert charclass.match("aa", 1) == (charclass("a"), 2) assert charclass.match("a$", 1) == (charclass("$"), 2) assert charclass.match(".", 0) == (dot, 1) try: charclass.match("[", 0) assert False except IndexError: pass try: charclass.match("a", 1) assert False except nomatch: pass
def test_charclass_fsm(): # "[^a]" nota = (~charclass("a")).to_fsm() assert nota.alphabet == {"a", fsm.anything_else} assert nota.accepts("b") assert nota.accepts(["b"]) assert nota.accepts([fsm.anything_else])
def assert_non_overlapping(fsa1, fsa2): """Assert that the intersection of two lego finite state automata is the empty FSA. """ if not'--no-skip' in sys.argv: raise SkipTest assert fsa1 & fsa2 == lego.charclass(), ("Overlapping regex: " "{}".format(fsa1 & fsa2))
def assert_non_overlapping(fsa1, fsa2): """Assert that the intersection of two lego finite state automata is the empty FSA. """ if not '--no-skip' in sys.argv: raise SkipTest assert fsa1 & fsa2 == lego.charclass(), ("Overlapping regex: " "{}".format(fsa1 & fsa2))
def test_conc_str(): assert str(conc( mult(charclass("a"), one), mult(charclass("b"), one), mult(charclass("c"), one), mult(charclass("d"), one), mult(charclass("e"), one), mult(~charclass("fg"), star), mult(charclass("h"), multiplier(bound(5), bound(5))), mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus), )) == "abcde[^fg]*h{5}[a-z]+"
def test_charclass_gen(): gen = charclass("xyz").strings() assert next(gen) == "x" assert next(gen) == "y" assert next(gen) == "z" try: next(gen) assert False except StopIteration: assert True
def test_mult_intersection(): assert mult.parse("a") & mult.parse("b?") == charclass() assert mult.parse("a") & mult.parse("b?") == nothing assert mult.parse("a") & mult.parse("a?") == charclass.parse("a") assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}") assert mult.parse("a") & mult.parse("b") == charclass.parse("[]") assert mult.parse("a") & mult.parse("a") == charclass.parse("a") assert mult.parse("a*") & mult.parse("a") == charclass.parse("a") assert mult.parse("a*") & mult.parse("b*") == conc.parse("") assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+") assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]") assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
def test_empty(): assert nothing.empty() assert charclass().empty() assert not dot.empty() assert not mult.parse("a{0}").empty() assert mult.parse("[]").empty() assert not mult.parse("[]?").empty() assert conc.parse("a[]").empty() assert not conc.parse("a[]?").empty() assert pattern().empty() assert not pattern.parse("a{0}").empty() assert not pattern.parse("[]?").empty()
def test_pattern_equality(): assert pattern( conc(mult(charclass("a"), one)), conc(mult(charclass("b"), one)), ) == pattern( conc(mult(charclass("b"), one)), conc(mult(charclass("a"), one)), ) assert pattern( conc(mult(charclass("a"), one)), conc(mult(charclass("a"), one)), ) == pattern( conc(mult(charclass("a"), one)), )
def test_mult_str(): a = charclass("a") assert str(mult(a, one)) == "a" assert str(mult(a, multiplier(bound(2), bound(2)))) == "aa" assert str(mult(a, multiplier(bound(3), bound(3)))) == "aaa" assert str(mult(a, multiplier(bound(4), bound(4)))) == "aaaa" assert str(mult(a, multiplier(bound(5), bound(5)))) == "a{5}" assert str(mult(a, qm)) == "a?" assert str(mult(a, star)) == "a*" assert str(mult(a, plus)) == "a+" assert str(mult(a, multiplier(bound(2), bound(5)))) == "a{2,5}" assert str(bound(2)) == "2" assert str(inf) == "" assert str(multiplier(bound(2), inf)) == "{2,}" assert str(mult(a, multiplier(bound(2), inf))) == "a{2,}" assert str(mult(d, one)) == "\\d" assert str(mult(d, multiplier(bound(2), bound(2)))) == "\\d\\d" assert str(mult(d, multiplier(bound(3), bound(3)))) == "\\d{3}"
def test_mult_str(): assert str(bound(2)) == "2" assert str(inf) == "" assert str(multiplier(bound(2), inf)) == "{2,}" a = charclass("a") assert str(mult(a, one)) == "a" assert str(mult(a, multiplier(bound(2), bound(2)))) == "a{2}" assert str(mult(a, multiplier(bound(3), bound(3)))) == "a{3}" assert str(mult(a, multiplier(bound(4), bound(4)))) == "a{4}" assert str(mult(a, multiplier(bound(5), bound(5)))) == "a{5}" assert str(mult(a, qm)) == "a?" assert str(mult(a, star)) == "a*" assert str(mult(a, plus)) == "a+" assert str(mult(a, multiplier(bound(2), bound(5)))) == "a{2,5}" assert str(mult(a, multiplier(bound(2), inf))) == "a{2,}" assert str(mult(d, one)) == "\\d" assert str(mult(d, multiplier(bound(2), bound(2)))) == "\\d{2}" assert str(mult(d, multiplier(bound(3), bound(3)))) == "\\d{3}"
def test_mult_intersection(): # a & b? = nothing assert mult.parse("a") & mult.parse("b?") == charclass() assert mult.parse("a") & mult.parse("b?") == nothing # a & a? = nothing assert mult.parse("a").reduce() == charclass.parse("a") assert mult.parse("a") & mult.parse("a?") == charclass.parse("a") # a{2} & a{2,} = a{2} assert mult.parse("a{2}") & mult.parse("a{2,}") == mult.parse("a{2}") # a & b -> no intersection. assert mult.parse("a") & mult.parse("b") == charclass.parse("[]") # a & a -> a assert mult.parse("a") & mult.parse("a") == charclass.parse("a") # a* & a -> a assert mult.parse("a*") & mult.parse("a") == charclass.parse("a") # a* & b* -> emptystring assert mult.parse("a*") & mult.parse("b*") == conc.parse("") # a* & a+ -> a+ assert mult.parse("a*") & mult.parse("a+") == mult.parse("a+") # aa & aaaa -> [] assert mult.parse("a{2}") & mult.parse("a{4}") == charclass.parse("[]") # a{3,4} & a{2,5} -> a{2,3} assert mult.parse("a{3,4}").common(mult.parse("a{2,5}")) == mult.parse("a{2,3}") # a{2,} & a{1,5} -> a{1,5} assert mult.parse("a{2,}").common(mult.parse("a{1,5}")) == mult.parse("a{1,5}") # a{3,}, a{2,} -> a{2,} (with a, epsilon left over) assert mult.parse("a{3,}").common(mult.parse("a{2,}")) == mult.parse("a{2,}") # a{3,}, a{3,} -> a{3,} (with inf, inf left over) assert mult.parse("a{3,}") & mult.parse("a{3,}") == mult.parse("a{3,}")
def test_mult_parsing(): assert mult.parse("[a-g]+") == mult(charclass("abcdefg"), plus) assert mult.parse("[a-g0-8$%]+") == mult(charclass("abcdefg012345678$%"), plus) assert mult.parse("[a-g0-8$%\\^]+") == mult(charclass("abcdefg012345678$%^"), plus) assert mult.match("abcde[^fg]*", 5) == ( mult(~charclass("fg"), star), 11 ) assert mult.match("abcde[^fg]*h{5}[a-z]+", 11) == ( mult(charclass("h"), multiplier(bound(5), bound(5))), 15 ) assert mult.match("abcde[^fg]*h{5}[a-z]+T{1,}", 15) == ( mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus), 21 ) assert mult.match("abcde[^fg]*h{5}[a-z]+T{2,}", 21) == ( mult(charclass("T"), multiplier(bound(2), inf)), 26 )
def test_pattern_str(): assert str(pattern( conc(mult(charclass("a"), one)), conc(mult(charclass("b"), one)), )) == "a|b" assert str(pattern( conc(mult(charclass("a"), one)), conc(mult(charclass("a"), one)), )) == "a" assert str(pattern( conc( mult(charclass("a"), one), mult(charclass("b"), one), mult(charclass("c"), one), ), conc( mult(charclass("d"), one), mult(charclass("e"), one), mult(charclass("f"), one), mult( pattern( conc( mult(charclass("g"), one), mult(charclass("h"), one), mult(charclass("i"), one), ), conc( mult(charclass("j"), one), mult(charclass("k"), one), mult(charclass("l"), one), ), ), one ), ), )) == "abc|def(ghi|jkl)"
def test_pattern_parsing(): assert pattern.parse("abc|def(ghi|jkl)") == pattern( conc( mult(charclass("a"), one), mult(charclass("b"), one), mult(charclass("c"), one), ), conc( mult(charclass("d"), one), mult(charclass("e"), one), mult(charclass("f"), one), mult( pattern( conc( mult(charclass("g"), one), mult(charclass("h"), one), mult(charclass("i"), one), ), conc( mult(charclass("j"), one), mult(charclass("k"), one), mult(charclass("l"), one), ), ), one ), ) ) # Accept the "non-capturing group" syntax, "(?: ... )" but give it no # special significance assert parse("(?:)") == parse("()") assert parse("(?:abc|def)") == parse("(abc|def)") parse("(:abc)") # should give no problems # Named groups assert pattern.parse("(?P<ng1>abc)") == parse("(abc)")
def test_charclass_multiplication(): assert charclass("a") * one == charclass("a") assert charclass("a") * multiplier.parse("{1,3}") == mult.parse("a{1,3}") assert charclass("a") * multiplier.parse("{4,}") == mult.parse("a{4,}")
def test_charclass_intersection(): # [ab] n [bc] = [b] assert charclass("ab") & charclass("bc") == charclass("b") # [ab] n [^bc] = [a] assert charclass("ab") & ~charclass("bc") == charclass("a") # [^ab] n [bc] = [c] assert ~charclass("ab") & charclass("bc") == charclass("c") # [^ab] n [^bc] = [^abc] assert ~charclass("ab") & ~charclass("bc") == ~charclass("abc")
def test_charclass_negation(): assert ~~charclass("a") == charclass("a") assert charclass("a") == ~~charclass("a")
def test_conc_equality(): assert conc(mult(charclass("a"), one)) == conc(mult(charclass("a"), one)) assert conc(mult(charclass("a"), one)) != conc(mult(charclass("b"), one)) assert conc(mult(charclass("a"), one)) != conc(mult(charclass("a"), qm)) assert conc(mult(charclass("a"), one)) != conc(mult(charclass("a"), multiplier(bound(1), bound(2)))) assert conc(mult(charclass("a"), one)) != emptystring
def test_repr(): assert repr(~charclass("a")) == "~charclass('a')"
def test_mult_equality(): assert mult(charclass("a"), one) == mult(charclass("a"), one) assert mult(charclass("a"), one) != mult(charclass("b"), one) assert mult(charclass("a"), one) != mult(charclass("a"), qm) assert mult(charclass("a"), one) != mult(charclass("a"), multiplier(bound(1), bound(2))) assert mult(charclass("a"), one) != charclass("a")
def lego(self): ''' This is the big kahuna of this module. Turn the present FSM into a regular expression object, as imported from the lego module. This is accomplished using the Brzozowski algebraic method. ''' from greenery.lego import nothing, charclass, emptystring, star, otherchars # We need a new state not already used; guess first beyond current len outside = len(self.states) while outside in self.states: outside += 1 # The set of strings that would be accepted by this FSM if you started # at state i is represented by the regex R_i. # If state i has a sole transition "a" to state j, then we know R_i = a R_j. # If state i is final, then the empty string is also accepted by this regex. # And so on... # From this we can build a set of simultaneous equations in len(self.states) # variables. This system is easily solved for all variables, but we only # need one: R_a, where a is the starting state. # The first thing we need to do is organise the states into order of depth, # so that when we perform our back-substitutions, we can start with the # last (deepest) state and therefore finish with R_a. states = [self.initial] i = 0 while i < len(states): current = states[i] for symbol in sorted(self.alphabet, key=str): next = self.map[current][symbol] if next not in states: states.append(next) i += 1 # Our system of equations is represented like so: brz = {} for a in self.states: brz[a] = {} for b in self.states | set([outside]): brz[a][b] = nothing # Populate it with some initial data. for a in self.map: for symbol in self.map[a]: b = self.map[a][symbol] if symbol == otherchars: brz[a][b] |= ~charclass(self.alphabet - set([otherchars])) else: brz[a][b] |= charclass(set([symbol])) if a in self.finals: brz[a][outside] |= emptystring # Now perform our back-substitution for i in reversed(range(len(states))): a = states[i] # Before the equation for R_a can be substituted into the other # equations, we need to resolve the self-transition (if any). # e.g. R_a = 0 R_a | 1 R_b | 2 R_c # becomes R_a = 0*1 R_b | 0*2 R_c loop = brz[a][a] * star # i.e. "0*" del brz[a][a] for right in brz[a]: brz[a][right] = loop + brz[a][right] # Note: even if we're down to our final equation, the above step still # needs to be performed before anything is returned. # Now we can substitute this equation into all of the previous ones. for j in range(i): b = states[j] # e.g. substituting R_a = 0*1 R_b | 0*2 R_c # into R_b = 3 R_a | 4 R_c | 5 R_d # yields R_b = 30*1 R_b | (30*2|4) R_c | 5 R_d univ = brz[b][a] # i.e. "3" del brz[b][a] for right in brz[a]: brz[b][right] |= univ + brz[a][right] return brz[self.initial][outside]
def test_empty_pattern_reduction(): assert pattern().reduce() == charclass()
def test_conc_parsing(): assert conc.parse("abcde[^fg]*h{5}[a-z]+") == conc( mult(charclass("a"), one), mult(charclass("b"), one), mult(charclass("c"), one), mult(charclass("d"), one), mult(charclass("e"), one), mult(~charclass("fg"), star), mult(charclass("h"), multiplier(bound(5), bound(5))), mult(charclass("abcdefghijklmnopqrstuvwxyz"), plus), ) assert conc.parse("[bc]*[ab]*") == conc( mult(charclass("bc"), star), mult(charclass("ab"), star), ) assert conc.parse("abc...") == conc( mult(charclass("a"), one), mult(charclass("b"), one), mult(charclass("c"), one), mult(dot, one), mult(dot, one), mult(dot, one), ) assert conc.parse("\\d{4}-\\d{2}-\\d{2}") == conc( mult(charclass("0123456789"), multiplier(bound(4), bound(4))), mult(charclass("-"), one), mult(charclass("0123456789"), multiplier(bound(2), bound(2))), mult(charclass("-"), one), mult(charclass("0123456789"), multiplier(bound(2), bound(2))), )
def test_charclass_union(): # [ab] u [bc] = [abc] assert charclass("ab") | charclass("bc") == charclass("abc") # [ab] u [^bc] = [^c] assert charclass("ab") | ~charclass("bc") == ~charclass("c") # [^a] u [bc] = [^a] assert ~charclass("ab") | charclass("bc") == ~charclass("a") # [^ab] u [^bc] = [^b] assert ~charclass("ab") | ~charclass("bc") == ~charclass("b")
def test_charclass_equality(): assert charclass("a") == charclass("a") assert ~charclass("a") == ~charclass("a") assert ~charclass("a") != charclass("a") assert charclass("ab") == charclass("ba")
def test_charclass_str(): assert str(w) == "\\w" assert str(d) == "\\d" assert str(s) == "\\s" assert str(charclass("a")) == "a" assert str(charclass("{")) == "\\{" assert str(charclass("\t")) == "\\t" assert str(charclass("ab")) == "[ab]" assert str(charclass("a{")) == "[a{]" assert str(charclass("a\t")) == "[\\ta]" assert str(charclass("a-")) == "[\\-a]" assert str(charclass("a[")) == "[\\[a]" assert str(charclass("a]")) == "[\\]a]" assert str(charclass("ab")) == "[ab]" assert str(charclass("abc")) == "[abc]" assert str(charclass("abcd")) == "[a-d]" assert str(charclass("abcdfghi")) == "[a-df-i]" assert str(charclass("^")) == "^" assert str(charclass("\\")) == "\\\\" assert str(charclass("a^")) == "[\\^a]" assert str(charclass("0123456789a")) == "[0-9a]" assert str(charclass("\t\v\r A")) == "[\\t\\v\\r A]" assert str(charclass("\n\f A")) == "[\\n\\f A]" assert str(charclass("\t\n\v\f\r A")) == "[\\t-\\r A]" assert str(charclass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz|")) == "[0-9A-Z_a-z|]" assert str(W) == "\\W" assert str(D) == "\\D" assert str(S) == "\\S" assert str(dot) == "." assert str(~charclass("")) == "." assert str(~charclass("a")) == "[^a]" assert str(~charclass("{")) == "[^{]" assert str(~charclass("\t")) == "[^\\t]" assert str(~charclass("^")) == "[^\\^]" # Arbitrary ranges assert str(parse("[\\w:;<=>?@\\[\\\\\\]\\^`]")) == "[0-z]" # TODO: what if \d is a proper subset of `chars`? # escape sequences are not preserved assert str(parse("\\x09")) == "\\t" # Printing ASCII control characters? You should get hex escapes assert str(parse("\\x00")) == "\\x00"
def lego(self): ''' This is the big kahuna of this module. Turn the present FSM into a regular expression object, as imported from the lego module. This is accomplished using the Brzozowski algebraic method. ''' from greenery.lego import nothing, charclass, emptystring, star, otherchars # We need a new state not already used; guess first beyond current len outside = len(self.states) while outside in self.states: outside += 1 # The set of strings that would be accepted by this FSM if you started # at state i is represented by the regex R_i. # If state i has a sole transition "a" to state j, then we know R_i = a R_j. # If state i is final, then the empty string is also accepted by this regex. # And so on... # From this we can build a set of simultaneous equations in len(self.states) # variables. This system is easily solved for all variables, but we only # need one: R_a, where a is the starting state. # The first thing we need to do is organise the states into order of depth, # so that when we perform our back-substitutions, we can start with the # last (deepest) state and therefore finish with R_a. states = [self.initial] i = 0 while i < len(states): current = states[i] for symbol in sorted(self.alphabet, key=str): next = self.map[current][symbol] if next not in states: states.append(next) i += 1 # Our system of equations is represented like so: brz = {} for a in self.states: brz[a] = {} for b in self.states | {outside}: brz[a][b] = nothing # Populate it with some initial data. for a in self.map: for symbol in self.map[a]: b = self.map[a][symbol] if symbol == otherchars: brz[a][b] |= ~charclass(self.alphabet - {otherchars}) else: brz[a][b] |= charclass({symbol}) if a in self.finals: brz[a][outside] |= emptystring # Now perform our back-substitution for i in reversed(range(len(states))): a = states[i] # Before the equation for R_a can be substituted into the other # equations, we need to resolve the self-transition (if any). # e.g. R_a = 0 R_a | 1 R_b | 2 R_c # becomes R_a = 0*1 R_b | 0*2 R_c loop = brz[a][a] * star # i.e. "0*" del brz[a][a] for right in brz[a]: brz[a][right] = loop + brz[a][right] # Note: even if we're down to our final equation, the above step still # needs to be performed before anything is returned. # Now we can substitute this equation into all of the previous ones. for j in range(i): b = states[j] # e.g. substituting R_a = 0*1 R_b | 0*2 R_c # into R_b = 3 R_a | 4 R_c | 5 R_d # yields R_b = 30*1 R_b | (30*2|4) R_c | 5 R_d univ = brz[b][a] # i.e. "3" del brz[b][a] for right in brz[a]: brz[b][right] |= univ + brz[a][right] return brz[self.initial][outside]
def test_charclass_str(): assert str(w) == "\\w" assert str(d) == "\\d" assert str(s) == "\\s" assert str(charclass("a")) == "a" assert str(charclass("{")) == "\\{" assert str(charclass("\t")) == "\\t" assert str(charclass("ab")) == "[ab]" assert str(charclass("a{")) == "[a{]" assert str(charclass("a\t")) == "[\\ta]" assert str(charclass("a-")) == "[\\-a]" assert str(charclass("a[")) == "[\\[a]" assert str(charclass("a]")) == "[\\]a]" assert str(charclass("ab")) == "[ab]" assert str(charclass("abc")) == "[abc]" assert str(charclass("abcd")) == "[a-d]" assert str(charclass("abcdfghi")) == "[a-df-i]" assert str(charclass("^")) == "^" assert str(charclass("\\")) == "\\\\" assert str(charclass("a^")) == "[\\^a]" assert str(charclass("0123456789a")) == "[0-9a]" assert str(charclass("\t\v\r A")) == "[\\t\\v\\r A]" assert str(charclass("\n\f A")) == "[\\n\\f A]" assert str(charclass("\t\n\v\f\r A")) == "[\\t-\\r A]" assert str(charclass("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz|")) == "[0-9A-Z_a-z|]" assert str(W) == "\\W" assert str(D) == "\\D" assert str(S) == "\\S" assert str(dot) == "." assert str(~charclass("")) == "." assert str(~charclass("a")) == "[^a]" assert str(~charclass("{")) == "[^{]" assert str(~charclass("\t")) == "[^\\t]" assert str(~charclass("^")) == "[^\\^]" # Arbitrary ranges assert str(parse("[\w:;<=>?@\\[\\\\\]\\^`]")) == "[0-z]" # TODO: what if \d is a proper subset of `chars`? # escape sequences are not preserved assert str(parse("\\x09")) == "\\t" # Printing ASCII control characters? You should get hex escapes assert str(parse("\\x00")) == "\\x00"