Exemplo n.º 1
0
    def test_or_operator(self):
        regex = translate_pattern('0|1', anchors=False)
        self.assertEqual(regex, r'^(0|1)$(?!\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('0').group(0), '0')
        self.assertEqual(pattern.search('1').group(0), '1')
        self.assertIsNone(pattern.search('1\n'))
        self.assertIsNone(pattern.search(''))
        self.assertIsNone(pattern.search('2'))
        self.assertIsNone(pattern.search('01'))
        self.assertIsNone(pattern.search('1\n '))

        regex = translate_pattern(r'\d+[%]|\d*\.\d+[%]', anchors=False)
        self.assertEqual(regex, r'^(\d+[%]|\d*\.\d+[%])$(?!\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('99%').group(0), '99%')
        self.assertEqual(pattern.search('99.9%').group(0), '99.9%')
        self.assertEqual(pattern.search('.90%').group(0), '.90%')
        self.assertIsNone(pattern.search('%'))
        self.assertIsNone(pattern.search('90.%'))

        regex = translate_pattern('([ -~]|\n|\r|\t)*', anchors=False)
        self.assertEqual(regex, '^(([ -~]|\n|\r|\t)*)$(?!\\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('ciao\t-~ ').group(0), 'ciao\t-~ ')
        self.assertEqual(pattern.search('\r\r').group(0), '\r\r')
        self.assertEqual(pattern.search('\n -.abc').group(0), '\n -.abc')
        self.assertIsNone(pattern.search('à'))
        self.assertIsNone(pattern.search('\t\n à'))
Exemplo n.º 2
0
    def test_invalid_hyphen(self):
        with self.assertRaises(RegexError) as ctx:
            translate_pattern('[a-b-c]')
        self.assertIn("unescaped character '-' at position 4", str(ctx.exception))

        regex = translate_pattern('[a-b-c]', xsd_version='1.1')
        self.assertEqual(regex, '[\\-a-c]')
        self.assertEqual(translate_pattern('[-a-bc]'), regex)
        self.assertEqual(translate_pattern('[a-bc-]'), regex)
Exemplo n.º 3
0
    def test_possessive_quantifiers(self):
        # Note: possessive quantifiers (*+, ++, ?+, {m,n}+) are supported in Python 3.11+

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('^[abcd]*+$')
        self.assertIn("unexpected meta character '+' at position 8", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('^[abcd]{1,5}+$')
        self.assertIn("unexpected meta character '+' at position 12", str(ctx.exception))
Exemplo n.º 4
0
    def test_back_references(self):
        self.assertEqual(translate_pattern('(a)\\1'), '(a)\\1')
        self.assertEqual(translate_pattern('(a)\\11'), '(a)\\1[1]')

        regex = translate_pattern('((((((((((((a))))))))))))\\11')
        self.assertEqual(regex, '((((((((((((a))))))))))))\\11')

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('(a)\\1', back_references=False)
        self.assertIn("not allowed escape sequence", str(ctx.exception))
Exemplo n.º 5
0
 def test_backslash_and_escapes(self):
     regex = translate_pattern('\\')
     self.assertEqual(regex, '\\')
     regex = translate_pattern('\\i')
     self.assertTrue(regex.startswith('[:A-Z_a-z'))
     regex = translate_pattern('\\I')
     self.assertTrue(regex.startswith('[^:A-Z_a-z'))
     regex = translate_pattern('\\c')
     self.assertTrue(regex.startswith('[-.0-9:A-Z_a-z'))
     regex = translate_pattern('\\C')
     self.assertTrue(regex.startswith('[^-.0-9:A-Z_a-z'))
Exemplo n.º 6
0
    def test_character_class_subtraction(self):
        regex = translate_pattern('[a-z-[aeiuo]]')
        self.assertEqual(regex, '[b-df-hj-np-tv-z]')

        # W3C XSD 1.1 test group RegexTest_422
        regex = translate_pattern('[^0-9-[a-zAE-Z]]')
        self.assertEqual(regex, '[^0-9AE-Za-z]')

        regex = translate_pattern(r'^([^0-9-[a-zAE-Z]]|[\w-[a-zAF-Z]])+$')
        pattern = re.compile(regex)
        self.assertIsNone(pattern.search('azBCDE1234567890BCDEFza'))
        self.assertEqual(pattern.search('BCD').group(0), 'BCD')
Exemplo n.º 7
0
    def test_anchors(self):
        regex = translate_pattern('a^b')
        self.assertEqual(regex, 'a^b')

        regex = translate_pattern('a^b', anchors=False)
        self.assertEqual(regex, '^(a\\^b)$(?!\\n\Z)')

        regex = translate_pattern('ab$')
        self.assertEqual(regex, 'ab$(?!\\n\\Z)')

        regex = translate_pattern('ab$', anchors=False)
        self.assertEqual(regex, '^(ab\\$)$(?!\\n\Z)')
Exemplo n.º 8
0
    def test_category_escape(self):
        regex = translate_pattern('^\\p{IsBasicLatin}*$')
        self.assertEqual(regex, '^[\x00-\x7f]*$(?!\\n\\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('').group(0), '')
        self.assertEqual(pattern.search('e').group(0), 'e')
        self.assertIsNone(pattern.search('è'))

        regex = translate_pattern('^[\\p{IsBasicLatin}\\p{IsLatin-1Supplement}]*$')
        self.assertEqual(regex, '^[\x00-\xff]*$(?!\\n\\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('e').group(0), 'e')
        self.assertEqual(pattern.search('è').group(0), 'è')
        self.assertIsNone(pattern.search('Ĭ'))
Exemplo n.º 9
0
 def test_issue_079(self):
     # Do not escape special characters in character class
     regex = translate_pattern('[^\n\t]+', anchors=False)
     self.assertEqual(regex, '^([^\t\n]+)$(?!\\n\\Z)')
     pattern = re.compile(regex)
     self.assertIsNone(pattern.search('first\tsecond\tthird'))
     self.assertEqual(pattern.search('first second third').group(0), 'first second third')
Exemplo n.º 10
0
    def test_digit_shortcut(self):
        regex = translate_pattern(r'\d{1,3}\.\d{1,2}', anchors=False)
        self.assertEqual(regex, r'^(\d{1,3}\.\d{1,2})$(?!\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('12.40').group(0), '12.40')
        self.assertEqual(pattern.search('867.00').group(0), '867.00')
        self.assertIsNone(pattern.search('867.00\n'))
        self.assertIsNone(pattern.search('867.00 '))
        self.assertIsNone(pattern.search('867.000'))
        self.assertIsNone(pattern.search('1867.0'))
        self.assertIsNone(pattern.search('a1.13'))

        regex = translate_pattern(r'[-+]?(\d+|\d+(\.\d+)?%)', anchors=False)
        self.assertEqual(regex, r'^([\+\-]?(\d+|\d+(\.\d+)?%))$(?!\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('78.8%').group(0), '78.8%')
        self.assertIsNone(pattern.search('867.00'))
Exemplo n.º 11
0
    def test_dot_wildcard(self):
        regex = translate_pattern('.+', anchors=False)
        self.assertEqual(regex, '^([^\r\n]+)$(?!\\n\\Z)')
        pattern = re.compile(regex)
        self.assertIsNone(pattern.search('line1\rline2\r'))
        self.assertIsNone(pattern.search('line1\nline2'))
        self.assertIsNone(pattern.search(''))
        self.assertIsNotNone(pattern.search('\\'))
        self.assertEqual(pattern.search('abc').group(0), 'abc')

        regex = translate_pattern('.+T.+(Z|[+-].+)', anchors=False)
        self.assertEqual(regex, '^([^\r\n]+T[^\r\n]+(Z|[\\+\\-][^\r\n]+))$(?!\\n\\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('12T0A3+36').group(0), '12T0A3+36')
        self.assertEqual(pattern.search('12T0A3Z').group(0), '12T0A3Z')
        self.assertIsNone(pattern.search(''))
        self.assertIsNone(pattern.search('12T0A3Z2'))
Exemplo n.º 12
0
 def test_ending_newline_match(self):
     # Related with xmlschema's issue #223
     regex = translate_pattern(pattern=r"\d{2}:\d{2}:\d{6,7}",
                               back_references=False,
                               lazy_quantifiers=False,
                               anchors=False)
     pattern = re.compile(regex)
     self.assertIsNotNone(pattern.match("38:36:000031"))
     self.assertIsNone(pattern.match("38:36:000031\n"))
Exemplo n.º 13
0
 def test_occurrences_qualifiers(self):
     regex = translate_pattern('#[0-9a-fA-F]{3}([0-9a-fA-F]{3})?', anchors=False)
     self.assertEqual(regex, r'^(#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?)$(?!\n\Z)')
     pattern = re.compile(regex)
     self.assertEqual(pattern.search('#F3D').group(0), '#F3D')
     self.assertIsNone(pattern.search('#F3D\n'))
     self.assertEqual(pattern.search('#F3DA30').group(0), '#F3DA30')
     self.assertIsNone(pattern.search('#F3'))
     self.assertIsNone(pattern.search('#F3D '))
     self.assertIsNone(pattern.search('F3D'))
     self.assertIsNone(pattern.search(''))
Exemplo n.º 14
0
    def test_invalid_character_class(self):
        with self.assertRaises(RegexError) as ctx:
            translate_pattern('[[]')
        self.assertIn("invalid character '['", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('ab]d')
        self.assertIn("unexpected meta character ']'", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('[abc\\1]')
        self.assertIn("illegal back-reference in character class", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('[--a]')
        self.assertIn("invalid character range '--'", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('[a-z-[c-q')
        self.assertIn("unterminated character class", str(ctx.exception))
Exemplo n.º 15
0
    def test_empty_character_class(self):
        regex = translate_pattern('[a-[a-f]]', anchors=False)
        self.assertEqual(regex, r'^([^\w\W])$(?!\n\Z)')
        self.assertRaises(RegexError, translate_pattern, '[]')

        self.assertEqual(translate_pattern(r'[\w-[\w]]'), r'[^\w\W]')
        self.assertEqual(translate_pattern(r'[\s-[\s]]'), r'[^\w\W]')
        self.assertEqual(translate_pattern(r'[\c-[\c]]'), r'[^\w\W]')
        self.assertEqual(translate_pattern(r'[\i-[\i]]'), r'[^\w\W]')
        self.assertEqual(translate_pattern('[a-[ab]]'), r'[^\w\W]')
        self.assertEqual(translate_pattern('[^a-[^a]]'), r'[^\w\W]')
Exemplo n.º 16
0
    def test_not_spaces(self):
        regex = translate_pattern(r"[\S' ']{1,10}", anchors=False)
        if sys.version_info >= (3,):
            self.assertEqual(
                regex, "^([\x00-\x08\x0b\x0c\x0e-\x1f!-\U0010ffff ']{1,10})$(?!\\n\\Z)"
            )

        pattern = re.compile(regex)
        self.assertIsNone(pattern.search('alpha\r'))
        self.assertEqual(pattern.search('beta').group(0), 'beta')
        self.assertIsNone(pattern.search('beta\n'))
        self.assertIsNone(pattern.search('beta\n '))
        self.assertIsNone(pattern.search(''))
        self.assertIsNone(pattern.search('over the maximum length!'))
        self.assertIsNotNone(pattern.search('\\'))
        self.assertEqual(pattern.search('abc').group(0), 'abc')
Exemplo n.º 17
0
    def test_character_class_reordering(self):
        regex = translate_pattern('[A-Z ]', anchors=False)
        self.assertEqual(regex, '^([ A-Z])$(?!\\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('A').group(0), 'A')
        self.assertEqual(pattern.search('Z').group(0), 'Z')
        self.assertEqual(pattern.search('Q').group(0), 'Q')
        self.assertEqual(pattern.search(' ').group(0), ' ')
        self.assertIsNone(pattern.search('  '))
        self.assertIsNone(pattern.search('AA'))

        regex = translate_pattern(r'[0-9.,DHMPRSTWYZ/:+\-]+', anchors=False)
        self.assertEqual(regex, r'^([\+-\-\.-:DHMPR-TWYZ]+)$(?!\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('12,40').group(0), '12,40')
        self.assertEqual(pattern.search('YYYY:MM:DD').group(0), 'YYYY:MM:DD')
        self.assertIsNone(pattern.search(''))
        self.assertIsNone(pattern.search('C'))

        regex = translate_pattern('[^: \n\r\t]+', anchors=False)
        self.assertEqual(regex, '^([^\t\n\r :]+)$(?!\\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('56,41').group(0), '56,41')
        self.assertIsNone(pattern.search('56,41\n'))
        self.assertIsNone(pattern.search('13:20'))

        regex = translate_pattern(r'^[A-Za-z0-9_\-]+(:[A-Za-z0-9_\-]+)?$')
        self.assertEqual(regex,
                         r'^[\-0-9A-Z_a-z]+(:[\-0-9A-Z_a-z]+)?$(?!\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('fa9').group(0), 'fa9')
        self.assertIsNone(pattern.search('-x_1:_tZ-\n'))
        self.assertEqual(pattern.search('-x_1:_tZ-').group(0), '-x_1:_tZ-')
        self.assertIsNone(pattern.search(''))
        self.assertIsNone(pattern.search('+78'))

        regex = translate_pattern(r'[!%\^\*@~;#,|/]', anchors=False)
        self.assertEqual(regex, r'^([!#%\*,/;@\^\|~])$(?!\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('#').group(0), '#')
        self.assertEqual(pattern.search('!').group(0), '!')
        self.assertEqual(pattern.search('^').group(0), '^')
        self.assertEqual(pattern.search('|').group(0), '|')
        self.assertEqual(pattern.search('*').group(0), '*')
        self.assertIsNone(pattern.search('**'))
        self.assertIsNone(pattern.search('b'))
        self.assertIsNone(pattern.search(''))

        regex = translate_pattern('[A-Za-z]+:[A-Za-z][A-Za-z0-9\\-]+',
                                  anchors=False)
        self.assertEqual(regex,
                         '^([A-Za-z]+:[A-Za-z][\\-0-9A-Za-z]+)$(?!\\n\Z)')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('zk:xy-9s').group(0), 'zk:xy-9s')
        self.assertIsNone(pattern.search('xx:y'))
Exemplo n.º 18
0
    def test_invalid_quantifiers(self):
        with self.assertRaises(RegexError) as ctx:
            translate_pattern('{1}')
        self.assertIn("unexpected quantifier '{'", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('.{1,2,3}')
        self.assertIn("invalid quantifier '{'", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('*')
        self.assertIn("unexpected quantifier '*'", str(ctx.exception))
Exemplo n.º 19
0
    def test_invalid_pattern_groups(self):
        with self.assertRaises(RegexError) as ctx:
            translate_pattern('(?.*)')
        self.assertIn("invalid '(?...)' extension notation", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('(.*))')
        self.assertIn("unbalanced parenthesis ')'", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('((.*)')
        self.assertIn("unterminated subpattern in expression", str(ctx.exception))
Exemplo n.º 20
0
    def test_character_class_shortcuts(self):
        regex = translate_pattern(r"^[\i-[:]][\c-[:]]*$")
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('x11').group(0), 'x11')
        self.assertIsNone(pattern.search('3a'))

        regex = translate_pattern(r"^\w*$")
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('aA_x7').group(0), 'aA_x7')
        self.assertIsNone(pattern.search('.'))
        self.assertIsNone(pattern.search('-'))

        regex = translate_pattern(r"\W*", anchors=False)
        pattern = re.compile(regex)
        self.assertIsNone(pattern.search('aA_x7'))
        self.assertEqual(pattern.search('.-').group(0), '.-')

        regex = translate_pattern(r"^\d*$")
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('6410').group(0), '6410')
        self.assertIsNone(pattern.search('a'))
        self.assertIsNone(pattern.search('-'))

        regex = translate_pattern(r"^\D*$")
        pattern = re.compile(regex)
        self.assertIsNone(pattern.search('6410'))
        self.assertEqual(pattern.search('a').group(0), 'a')
        self.assertEqual(pattern.search('-').group(0), '-')

        # Pull Request 114
        regex = translate_pattern(r"^[\w]{0,5}$")
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('abc').group(0), 'abc')
        self.assertIsNone(pattern.search('.'))

        regex = translate_pattern(r"^[\W]{0,5}$")
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('.').group(0), '.')
        self.assertIsNone(pattern.search('abc'))
Exemplo n.º 21
0
 def test_verbose_patterns(self):
     regex = translate_pattern('\\  s*[a-z]+', flags=re.VERBOSE)
     self.assertEqual(regex, '\\s*[a-z]+')
     regex = translate_pattern('\\  p{  Is BasicLatin}+', flags=re.VERBOSE)
     self.assertEqual(regex, '[\x00-\x7f]+')
Exemplo n.º 22
0
    def test_lazy_quantifiers(self):
        regex = translate_pattern('.*?')
        self.assertEqual(regex, '[^\r\n]*?')
        regex = translate_pattern('[a-z]{2,3}?')
        self.assertEqual(regex, '[a-z]{2,3}?')
        regex = translate_pattern('[a-z]*?')
        self.assertEqual(regex, '[a-z]*?')

        regex = translate_pattern('[a-z]*', lazy_quantifiers=False)
        self.assertEqual(regex, '[a-z]*')

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('.*?', lazy_quantifiers=False)
        self.assertEqual(str(ctx.exception),
                         "unexpected meta character '?' at position 2: '.*?'")

        with self.assertRaises(RegexError):
            translate_pattern('[a-z]{2,3}?', lazy_quantifiers=False)

        with self.assertRaises(RegexError):
            translate_pattern(r'[a-z]{2,3}?\s+', lazy_quantifiers=False)

        with self.assertRaises(RegexError):
            translate_pattern(r'[a-z]+?\s+', lazy_quantifiers=False)
Exemplo n.º 23
0
 def test_character_class_range(self):
     regex = translate_pattern('[bc-]')
     self.assertEqual(regex, r'[\-bc]')
Exemplo n.º 24
0
    def test_block_escapes(self):
        regex = translate_pattern('\\p{P}')
        self.assertTrue(regex.startswith('[!-#%-'))
        regex = translate_pattern('\\P{P}')
        self.assertTrue(regex.startswith('[^!-#%-'))
        regex = translate_pattern('\\p{IsBasicLatin}')
        self.assertEqual(regex, '[\x00-\x7f]')
        regex = translate_pattern('\\p{IsBasicLatin}', flags=re.IGNORECASE)
        self.assertEqual(regex, '(?-i:[\x00-\x7f])')

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('\\px')
        self.assertIn("a '{' expected", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('\\p{Pu')
        self.assertIn("truncated unicode block escape", str(ctx.exception))

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('\\p{Unknown}')
        self.assertIn("'Unknown' doesn't match to any Unicode category",
                      str(ctx.exception))

        regex = translate_pattern('\\p{IsUnknown}', xsd_version='1.1')
        self.assertEqual(regex, '[\x00-\U0010fffe]')

        with self.assertRaises(RegexError) as ctx:
            translate_pattern('\\p{IsUnknown}')
        self.assertIn("'IsUnknown' doesn't match to any Unicode block",
                      str(ctx.exception))