def test_codepoint(self): parsed = Regex.Parser(u"\\xa3").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(0xa3, 0xa3)]))) parsed = Regex.Parser(u"\\u123e").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(0x123e, 0x123e)]))) parsed = Regex.Parser(u"\\U10ffff").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(0x10ffff, 0x10ffff)])))
def test_variable(self): parsed = Regex.Parser(ur"[[:alpha:]]{1,32}{Hello}{2,34}").parse() expected = Regex.Concatenation([ Regex.Repetition( Regex.Literal([(ord(u"a"), ord(u"z")), (ord(u"A"), ord(u"Z"))]), 1, 32), Regex.Repetition(Regex.Variable("Hello"), 2, 34) ]) self.assertEqual(repr(parsed), repr(expected))
def test_simple_case(self): defines = { 'a': Regex.Parser("[abc]+").parse(), 'b': Regex.Parser("[xyz]*{a}d").parse() } regex = Regex.Parser("Hello{b}Ok").parse() resolver = Regex.VariableResolver(defines) regex.accept(resolver) resolved = resolver.get() expected = Regex.Concatenation([ Regex.Literal([(ord('H'), ord('H'))]), Regex.Literal([(ord('e'), ord('e'))]), Regex.Literal([(ord('l'), ord('l'))]), Regex.Literal([(ord('l'), ord('l'))]), Regex.Literal([(ord('o'), ord('o'))]), Regex.Concatenation([ Regex.Repetition( Regex.Literal([((ord('x'), ord('z')))]), 0, Regex.Repetition.Infinity), Regex.Repetition( Regex.Literal([((ord('a'), ord('c')))]), 1, Regex.Repetition.Infinity), Regex.Literal([(ord('d'), ord('d'))])]), Regex.Literal([(ord('O'), ord('O'))]), Regex.Literal([(ord('k'), ord('k'))])]) self.assertEqual(repr(resolved), repr(expected))
def test_grouping_and_subexpressions(self): parsed = Regex.Parser("[a-z--h]").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(97, 103), (105, 122)]))) parsed = Regex.Parser("[[a-z]--h]").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(97, 103), (105, 122)]))) parsed = Regex.Parser("[[a-z]--[h]]").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(97, 103), (105, 122)]))) parsed = Regex.Parser("[[a-z]--g[h]]").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(97, 102), (105, 122)]))) parsed = Regex.Parser("[[a-z]--[gh--g]]").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(97, 103), (105, 122)]))) parsed = Regex.Parser("[a-h~~h-z]").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(97, 103), (105, 122)]))) parsed = Regex.Parser("[a-h||h-z]").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(97, 122)]))) parsed = Regex.Parser("[a-h&&h-z]").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(104, 104)])))
def test_float_pattern(self): expected = Regex.Alternation([ Regex.Concatenation([ Regex.Repetition(Regex.Literal([(ord(u"0"), ord(u"9"))]), 1, -1), Regex.Literal([(ord(u"."), ord(u"."))]), Regex.Repetition(Regex.Literal([(ord(u"0"), ord(u"9"))]), 0, -1) ]), Regex.Concatenation([ Regex.Repetition(Regex.Literal([(ord(u"0"), ord(u"9"))]), 0, -1), Regex.Literal([(ord(u"."), ord(u"."))]), Regex.Repetition(Regex.Literal([(ord(u"0"), ord(u"9"))]), 1, -1) ]) ]) parsed = Regex.Parser(ur"[[:digit:]]+\.[0-9]*|[0-9]*\.[0-9]+").parse() self.assertEqual(repr(parsed), repr(expected))
def test_unicode(self): parsed = Regex.Parser(u"\p{Name=Nonsense}").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([]))) parsed = Regex.Parser(u"\p{Name=MONGOLIAN-todo Soft_HYPHEN}").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(0x1806, 0x1806)]))) parsed = Regex.Parser(u"\p{Name: MONGOLIAN-todo Soft_HYPHEN}").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(0x1806, 0x1806)]))) parsed = Regex.Parser(u"\p{na=MONGOLIAN-todo Soft_HYPHEN}").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(0x1806, 0x1806)]))) parsed = Regex.Parser(u"\p{na:MONGOLIAN-todo Soft_HYPHEN}").parse() self.assertEqual(repr(parsed), repr(Regex.Literal([(0x1806, 0x1806)]))) parsed = Regex.Parser(u"\p{General Category=Pd}").parse() self.assertEqual( repr(parsed), repr( Regex.Literal([(0x2d, 0x2d), (0x58A, 0x58a), (0x5be, 0x5be), (0x1400, 0x1400), (0x1806, 0x1806), (0x2010, 0x2015), (0x2E17, 0x2E17), (0x2E1A, 0x2E1A), (0x2E3A, 0x2E3B), (0x2E40, 0x2E40), (0x301C, 0x301C), (0x3030, 0x3030), (0x30A0, 0x30A0), (0xFE31, 0xFE32), (0xFe58, 0xFE58), (0xFE63, 0xFE63), (0xFF0D, 0xFF0D)])))