示例#1
0
    def test_or_operator(self):
        regex = get_python_regex('0|1')
        self.assertEqual(regex, '^(0|1)$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('0').group(0), '0')
        self.assertEqual(pattern.search('1').group(0), '1')
        self.assertEqual(pattern.search('1\n').group(0), '1')
        self.assertIsNone(pattern.search(''))
        self.assertIsNone(pattern.search('2'))
        self.assertIsNone(pattern.search('01'))
        self.assertIsNone(pattern.search('1\n '))

        regex = get_python_regex(r'\d+[%]|\d*\.\d+[%]')
        self.assertEqual(regex, r'^(\d+[%]|\d*\.\d+[%])$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('99%').group(0), '99%')
        self.assertEqual(pattern.search('99.9%').group(0), '99.9%')
        self.assertEqual(pattern.search('.90%').group(0), '.90%')
        self.assertIsNone(pattern.search('%'))
        self.assertIsNone(pattern.search('90.%'))

        regex = get_python_regex('([ -~]|\n|\r|\t)*')
        self.assertEqual(regex, '^(([ -~]|\n|\r|\t)*)$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('ciao\t-~ ').group(0), 'ciao\t-~ ')
        self.assertEqual(pattern.search('\r\r').group(0), '\r\r')
        self.assertEqual(pattern.search('\n -.abc').group(0), '\n -.abc')
        self.assertIsNone(pattern.search('à'))
        self.assertIsNone(pattern.search('\t\n à'))
示例#2
0
    def test_character_class_shortcuts(self):
        regex = get_python_regex(r"[\i-[:]][\c-[:]]*")
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('x11').group(0), 'x11')
        self.assertIsNone(pattern.search('3a'))

        regex = get_python_regex(r"\w*")
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('aA_x7').group(0), 'aA_x7')
        self.assertIsNone(pattern.search('.'))
        self.assertIsNone(pattern.search('-'))

        regex = get_python_regex(r"\W*")
        pattern = re.compile(regex)
        self.assertIsNone(pattern.search('aA_x7'))
        self.assertEqual(pattern.search('.-').group(0), '.-')

        regex = get_python_regex(r"\d*")
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('6410').group(0), '6410')
        self.assertIsNone(pattern.search('a'))
        self.assertIsNone(pattern.search('-'))

        regex = get_python_regex(r"\D*")
        pattern = re.compile(regex)
        self.assertIsNone(pattern.search('6410'))
        self.assertEqual(pattern.search('a').group(0), 'a')
        self.assertEqual(pattern.search('-').group(0), '-')
示例#3
0
    def test_character_class_subtraction(self):
        regex = get_python_regex('[a-z-[aeiuo]]')
        self.assertEqual(regex, '^([b-df-hj-np-tv-z])$')

        # W3C XSD 1.1 test group RegexTest_422
        regex = get_python_regex('[^0-9-[a-zAE-Z]]')
        self.assertEqual(regex, '^([^0-9AE-Za-z])$')

        regex = get_python_regex(r'([^0-9-[a-zAE-Z]]|[\w-[a-zAF-Z]])+')
        pattern = re.compile(regex)
        self.assertIsNone(pattern.search('azBCDE1234567890BCDEFza'))
        self.assertEqual(pattern.search('BCD').group(0), 'BCD')
示例#4
0
    def test_category_escape(self):
        regex = get_python_regex('\\p{IsBasicLatin}*')
        self.assertEqual(regex, '^([\x00-\x7f]*)$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('').group(0), '')
        self.assertEqual(pattern.search('e').group(0), 'e')
        self.assertIsNone(pattern.search('è'))

        regex = get_python_regex('[\\p{IsBasicLatin}\\p{IsLatin-1Supplement}]*')
        self.assertEqual(regex, '^([\x00-\xff]*)$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('e').group(0), 'e')
        self.assertEqual(pattern.search('è').group(0), 'è')
        self.assertIsNone(pattern.search('Ĭ'))
示例#5
0
 def test_issue_079(self):
     # Do not escape special characters in character class
     regex = get_python_regex('[^\n\t]+')
     self.assertEqual(regex, '^([^\t\n]+)$')
     pattern = re.compile(regex)
     self.assertIsNone(pattern.search('first\tsecond\tthird'))
     self.assertEqual(pattern.search('first second third').group(0), 'first second third')
示例#6
0
    def test_digit_shortcut(self):
        regex = get_python_regex(r'\d{1,3}\.\d{1,2}')
        self.assertEqual(regex, r'^(\d{1,3}\.\d{1,2})$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('12.40').group(0), '12.40')
        self.assertEqual(pattern.search('867.00').group(0), '867.00')
        self.assertEqual(pattern.search('867.00\n').group(0), '867.00')
        self.assertIsNone(pattern.search('867.00 '))
        self.assertIsNone(pattern.search('867.000'))
        self.assertIsNone(pattern.search('1867.0'))
        self.assertIsNone(pattern.search('a1.13'))

        regex = get_python_regex(r'[-+]?(\d+|\d+(\.\d+)?%)')
        self.assertEqual(regex, r'^([\+\-]?(\d+|\d+(\.\d+)?%))$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('78.8%').group(0), '78.8%')
        self.assertIsNone(pattern.search('867.00'))
示例#7
0
    def test_dot_wildcard(self):
        regex = get_python_regex('.+')
        self.assertEqual(regex, '^([^\r\n]+)$')
        pattern = re.compile(regex)
        self.assertIsNone(pattern.search('line1\rline2\r'))
        self.assertIsNone(pattern.search('line1\nline2'))
        self.assertIsNone(pattern.search(''))
        self.assertIsNotNone(pattern.search('\\'))
        self.assertEqual(pattern.search('abc').group(0), 'abc')

        regex = get_python_regex('.+T.+(Z|[+-].+)')
        self.assertEqual(regex, '^([^\r\n]+T[^\r\n]+(Z|[\\+\\-][^\r\n]+))$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('12T0A3+36').group(0), '12T0A3+36')
        self.assertEqual(pattern.search('12T0A3Z').group(0), '12T0A3Z')
        self.assertIsNone(pattern.search(''))
        self.assertIsNone(pattern.search('12T0A3Z2'))
示例#8
0
 def test_occurrences_qualifiers(self):
     regex = get_python_regex('#[0-9a-fA-F]{3}([0-9a-fA-F]{3})?')
     self.assertEqual(regex, '^(#[0-9A-Fa-f]{3}([0-9A-Fa-f]{3})?)$')
     pattern = re.compile(regex)
     self.assertEqual(pattern.search('#F3D').group(0), '#F3D')
     self.assertEqual(pattern.search('#F3D\n').group(0), '#F3D')
     self.assertEqual(pattern.search('#F3DA30').group(0), '#F3DA30')
     self.assertIsNone(pattern.search('#F3'))
     self.assertIsNone(pattern.search('#F3D '))
     self.assertIsNone(pattern.search('F3D'))
     self.assertIsNone(pattern.search(''))
示例#9
0
    def test_not_spaces(self):
        regex = get_python_regex(r"[\S' ']{1,10}")
        if sys.version_info >= (3,):
            self.assertEqual(regex, "^([\x00-\x08\x0b\x0c\x0e-\x1f!-\U0010ffff ']{1,10})$")

        pattern = re.compile(regex)
        self.assertIsNone(pattern.search('alpha\r'))
        self.assertEqual(pattern.search('beta').group(0), 'beta')
        self.assertEqual(pattern.search('beta\n').group(0), 'beta')  # $ matches also a \n at last position
        self.assertIsNone(pattern.search('beta\n '))
        self.assertIsNone(pattern.search(''))
        self.assertIsNone(pattern.search('over the maximum length!'))
        self.assertIsNotNone(pattern.search('\\'))
        self.assertEqual(pattern.search('abc').group(0), 'abc')
示例#10
0
    def test_character_class_reordering(self):
        regex = get_python_regex('[A-Z ]')
        self.assertEqual(regex, '^([ A-Z])$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('A').group(0), 'A')
        self.assertEqual(pattern.search('Z').group(0), 'Z')
        self.assertEqual(pattern.search('Q').group(0), 'Q')
        self.assertEqual(pattern.search(' ').group(0), ' ')
        self.assertIsNone(pattern.search('  '))
        self.assertIsNone(pattern.search('AA'))

        regex = get_python_regex(r'[0-9.,DHMPRSTWYZ/:+\-]+')
        self.assertEqual(regex, r'^([\+-\-\.-:DHMPR-TWYZ]+)$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('12,40').group(0), '12,40')
        self.assertEqual(pattern.search('YYYY:MM:DD').group(0), 'YYYY:MM:DD')
        self.assertIsNone(pattern.search(''))
        self.assertIsNone(pattern.search('C'))

        regex = get_python_regex('[^: \n\r\t]+')
        self.assertEqual(regex, '^([^\t\n\r :]+)$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('56,41').group(0), '56,41')
        self.assertEqual(pattern.search('56,41\n').group(0), '56,41')
        self.assertIsNone(pattern.search('13:20'))

        regex = get_python_regex(r'[A-Za-z0-9_\-]+(:[A-Za-z0-9_\-]+)?')
        self.assertEqual(regex, r'^([\-0-9A-Z_a-z]+(:[\-0-9A-Z_a-z]+)?)$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('fa9').group(0), 'fa9')
        self.assertEqual(pattern.search('-x_1:_tZ-\n').group(0), '-x_1:_tZ-')
        self.assertIsNone(pattern.search(''))
        self.assertIsNone(pattern.search('+78'))

        regex = get_python_regex(r'[!%\^\*@~;#,|/]')
        self.assertEqual(regex, r'^([!#%\*,/;@\^\|~])$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('#').group(0), '#')
        self.assertEqual(pattern.search('!').group(0), '!')
        self.assertEqual(pattern.search('^').group(0), '^')
        self.assertEqual(pattern.search('|').group(0), '|')
        self.assertEqual(pattern.search('*').group(0), '*')
        self.assertIsNone(pattern.search('**'))
        self.assertIsNone(pattern.search('b'))
        self.assertIsNone(pattern.search(''))

        regex = get_python_regex('[A-Za-z]+:[A-Za-z][A-Za-z0-9\\-]+')
        self.assertEqual(regex, '^([A-Za-z]+:[A-Za-z][\\-0-9A-Za-z]+)$')
        pattern = re.compile(regex)
        self.assertEqual(pattern.search('zk:xy-9s').group(0), 'zk:xy-9s')
        self.assertIsNone(pattern.search('xx:y'))
示例#11
0
 def test_character_class_range(self):
     regex = get_python_regex('[bc-]')
     self.assertEqual(regex, r'^([\-bc])$')
示例#12
0
 def test_empty_character_group_repr(self):
     regex = get_python_regex('[a-[a-f]]')
     self.assertEqual(regex, r'^([^\w\W])$')
     self.assertRaises(XMLSchemaRegexError, get_python_regex, '[]')
示例#13
0
 def test_character_class_shortcuts(self):
     regex = get_python_regex("[\i-[:]][\c-[:]]*")
     pattern = re.compile(regex)
     self.assertEqual(pattern.search('x11').group(0), 'x11')
     self.assertIsNone(pattern.search('3a'))