def test_is_block(self):
     """::
     IsBlock    ::=    'Is' [a-zA-Z0-9#x2D]+"""
     tests = {
         # positive and negative tests
         'BasicLatin': ("ABC", ul("\xc0\xdf\xa9")),
         'Latin-1Supplement': (ul("\xc0\xdf\xa9"), "ABC"),
         'CurrencySymbols':
         (u8(b'\xe2\x82\xa4\xe2\x82\xa9\xe2\x82\xac'), ul("\x24\xa2\xa3")),
         'NumberForms':
         (u8(b'\xe2\x85\x95\xe2\x85\x96\xe2\x85\x97\xe2\x85\x98'),
          "1/5 2/5 3/5 4/5")
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser("Is" + b)
         cclass = p.require_is_block()
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in Is%s" % (repr(c), b))
     p = xsi.RegularExpressionParser("IsNumberFoams")
     try:
         cclass = p.require_is_block()
         self.fail("IsNumberFoams")
     except xsi.RegularExpressionError:
         pass
 def test_char_prop(self):
     """::
     charProp ::= IsCategory | IsBlock"""
     tests = {
         # positive and negative tests
         'Nd': (u8(b'123\xdb\xb1\xdb\xb2\xdb\xb3'),
                u8(b'ABC\xe2\x85\x95\xe2\x85\x96\xe2\x85\x97\xe2\x85\x98')),
         'S': (u8(b'+<=>\xe2\x81\x84\xe2\x82\xac'), "(){}"),
         'IsBasicLatin': ("ABC", ul("\xc0\xdf\xa9")),
         'IsLatin-1Supplement': (ul("\xc0\xdf\xa9"), "ABC"),
         'IsCurrencySymbols':
         (u8(b'\xe2\x82\xa4\xe2\x82\xa9\xe2\x82\xac'), ul("\x24\xa2\xa3")),
         'IsNumberForms':
         (u8(b'\xe2\x85\x95\xe2\x85\x96\xe2\x85\x97\xe2\x85\x98'),
          "1/5 2/5 3/5 4/5")
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser(b)
         cclass = p.require_char_prop()
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in %s" % (repr(c), b))
示例#3
0
 def test_constructor(self):
     u = uri.URI(SIMPLE_EXAMPLE)
     self.assertTrue(isinstance(u, uri.URI))
     self.assertTrue(str(u) == SIMPLE_EXAMPLE)
     self.assertTrue(is_unicode(u.octets),
                     "octets must be a character string")
     if py2:
         self.assertTrue(to_text(u) == SIMPLE_EXAMPLE)
     try:
         u = uri.URI(LIST_EXAMPLE)
         # we don't support this type of thing any more
         # self.assertTrue(str(u)==SIMPLE_EXAMPLE,"Simple from list")
     except uri.URIException:
         pass
     u = uri.URI.from_octets(u8(b'\xe8\x8b\xb1\xe5\x9b\xbd.xml'))
     self.assertTrue(
         str(u) == '%E8%8B%B1%E5%9B%BD.xml', "Unicode example: %s" % str(u))
     self.assertTrue(is_unicode(u.octets),
                     "octets must be a character string")
     try:
         u = uri.URI.from_octets(u8(b'\xe8\x8b\xb1\xe5\x9b\xbd.xml'),
                                 strict=True)
         self.fail("strict mode requires %-encoding")
     except uri.URIException:
         pass
     # binary string must be US-ASCII clean
     try:
         u = uri.URI.from_octets(b'Caf\xe9')
         self.fail("binary string must be US-ASCII")
     except UnicodeDecodeError:
         pass
     # but URI-encoded is OK even if it is binary
     u = uri.URI.from_octets(b'Caf%E9')
     self.assertTrue(is_unicode(u.octets),
                     "octets must be a character string")
示例#4
0
 def test_is_block(self):
     """::
     IsBlock    ::=    'Is' [a-zA-Z0-9#x2D]+"""
     tests = {
         # positive and negative tests
         'BasicLatin': ("ABC", ul("\xc0\xdf\xa9")),
         'Latin-1Supplement': (ul("\xc0\xdf\xa9"), "ABC"),
         'CurrencySymbols': (u8(b'\xe2\x82\xa4\xe2\x82\xa9\xe2\x82\xac'),
                             ul("\x24\xa2\xa3")),
         'NumberForms': (
             u8(b'\xe2\x85\x95\xe2\x85\x96\xe2\x85\x97\xe2\x85\x98'),
             "1/5 2/5 3/5 4/5")
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser("Is" + b)
         cclass = p.require_is_block()
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in Is%s" % (repr(c), b))
     p = xsi.RegularExpressionParser("IsNumberFoams")
     try:
         cclass = p.require_is_block()
         self.fail("IsNumberFoams")
     except xsi.RegularExpressionError:
         pass
示例#5
0
 def test_char_prop(self):
     """::
     charProp ::= IsCategory | IsBlock"""
     tests = {
         # positive and negative tests
         'Nd': (u8(b'123\xdb\xb1\xdb\xb2\xdb\xb3'),
                u8(b'ABC\xe2\x85\x95\xe2\x85\x96\xe2\x85\x97\xe2\x85\x98')),
         'S': (u8(b'+<=>\xe2\x81\x84\xe2\x82\xac'), "(){}"),
         'IsBasicLatin': ("ABC", ul("\xc0\xdf\xa9")),
         'IsLatin-1Supplement': (ul("\xc0\xdf\xa9"), "ABC"),
         'IsCurrencySymbols': (u8(b'\xe2\x82\xa4\xe2\x82\xa9\xe2\x82\xac'),
                               ul("\x24\xa2\xa3")),
         'IsNumberForms': (
             u8(b'\xe2\x85\x95\xe2\x85\x96\xe2\x85\x97\xe2\x85\x98'),
             "1/5 2/5 3/5 4/5")
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser(b)
         cclass = p.require_char_prop()
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in %s" % (repr(c), b))
示例#6
0
 def test_constructor(self):
     u = uri.URI(SIMPLE_EXAMPLE)
     self.assertTrue(isinstance(u, uri.URI))
     self.assertTrue(str(u) == SIMPLE_EXAMPLE)
     self.assertTrue(is_unicode(u.octets),
                     "octets must be a character string")
     if py2:
         self.assertTrue(to_text(u) == SIMPLE_EXAMPLE)
     try:
         u = uri.URI(LIST_EXAMPLE)
         # we don't support this type of thing any more
         # self.assertTrue(str(u)==SIMPLE_EXAMPLE,"Simple from list")
     except uri.URIException:
         pass
     u = uri.URI.from_octets(u8(b'\xe8\x8b\xb1\xe5\x9b\xbd.xml'))
     self.assertTrue(
         str(u) == '%E8%8B%B1%E5%9B%BD.xml', "Unicode example: %s" % str(u))
     self.assertTrue(is_unicode(u.octets),
                     "octets must be a character string")
     try:
         u = uri.URI.from_octets(u8(b'\xe8\x8b\xb1\xe5\x9b\xbd.xml'),
                                 strict=True)
         self.fail("strict mode requires %-encoding")
     except uri.URIException:
         pass
     # binary string must be US-ASCII clean
     try:
         u = uri.URI.from_octets(b'Caf\xe9')
         self.fail("binary string must be US-ASCII")
     except UnicodeDecodeError:
         pass
     # but URI-encoded is OK even if it is binary
     u = uri.URI.from_octets(b'Caf%E9')
     self.assertTrue(is_unicode(u.octets),
                     "octets must be a character string")
示例#7
0
 def test_parse_hex_digit(self):
     p = unicode5.BasicParser(
         u8(b"0123456789abcdefghijklmnopqrstuvwxyz"
            b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
            b"\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5"
            b"\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9"))
     result = []
     while p.the_char is not None:
         digit = p.parse_hex_digit()
         if digit is not None:
             result.append(digit)
         else:
             p.next_char()
     self.assertTrue(ul('').join(result) ==
                     ul('0123456789abcdefABCDEF'))
     # and now binary
     p = unicode5.BasicParser(
         b"0123456789abcdefghijklmnopqrstuvwxyz"
         b"ABCDEFGHIJKLMNOPQRSTUVWXYZ")
     result = []
     while p.the_char is not None:
         digit = p.parse_hex_digit()
         if digit is not None:
             result.append(digit)
         else:
             p.next_char()
     self.assertTrue(join_bytes(result) ==
                     b'0123456789abcdefABCDEF')
示例#8
0
 def test_parse_hex_digit(self):
     p = unicode5.BasicParser(
         u8(b"0123456789abcdefghijklmnopqrstuvwxyz"
            b"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
            b"\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5"
            b"\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9"))
     result = []
     while p.the_char is not None:
         digit = p.parse_hex_digit()
         if digit is not None:
             result.append(digit)
         else:
             p.next_char()
     self.assertTrue(ul('').join(result) == ul('0123456789abcdefABCDEF'))
     # and now binary
     p = unicode5.BasicParser(b"0123456789abcdefghijklmnopqrstuvwxyz"
                              b"ABCDEFGHIJKLMNOPQRSTUVWXYZ")
     result = []
     while p.the_char is not None:
         digit = p.parse_hex_digit()
         if digit is not None:
             result.append(digit)
         else:
             p.next_char()
     self.assertTrue(join_bytes(result) == b'0123456789abcdefABCDEF')
 def test_is_category(self):
     """::
     IsCategory ::= Letters | Marks | Numbers | Punctuation |
         Separators | Symbols | Others
     Letters  ::= 'L' [ultmo]?
     Marks  ::= 'M' [nce]?
     Numbers  ::= 'N' [dlo]?
     Punctuation ::= 'P' [cdseifo]?
     Separators ::= 'Z' [slp]?
     Symbols  ::= 'S' [mcko]?
     Others  ::= 'C' [cfon]?"""
     tests = [
         "L", "Lu", "Ll", "Lt", "Lm", "Lo", "M", "Mn", "Mc", "Me", "N",
         "Nd", "Nl", "No", "P", "Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po",
         "Z", "Zs", "Zl", "Zp", "S", "Sm", "Sc", "Sk", "So", "C", "Cc",
         "Cf", "Co", "Cn"
     ]
     bad = ["A", "Za"]
     for s in tests:
         p = xsi.RegularExpressionParser(s)
         self.assertTrue(isinstance(p.require_is_category(), CharClass),
                         "Missing category: %s" % s)
         self.assertTrue(p.the_char is None,
                         "Incomplete parse of category: %s" % s)
     for s in bad:
         p = xsi.RegularExpressionParser(s)
         try:
             p.require_is_category()
             self.assertFalse(p.the_char is None,
                              "Undetected bad category: %s" % s)
         except xsi.RegularExpressionError:
             pass
     tests = {
         # positive and negative tests
         'Nd': (u8(b'123\xdb\xb1\xdb\xb2\xdb\xb3'),
                u8(b'ABC\xe2\x85\x95\xe2\x85\x96\xe2\x85\x97\xe2\x85\x98')),
         'S': (u8(b'+<=>\xe2\x81\x84\xe2\x82\xac'), "(){}")
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser(b)
         cclass = p.require_is_category()
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in %s" % (repr(c), b))
示例#10
0
 def test_is_category(self):
     """::
     IsCategory ::= Letters | Marks | Numbers | Punctuation |
         Separators | Symbols | Others
     Letters  ::= 'L' [ultmo]?
     Marks  ::= 'M' [nce]?
     Numbers  ::= 'N' [dlo]?
     Punctuation ::= 'P' [cdseifo]?
     Separators ::= 'Z' [slp]?
     Symbols  ::= 'S' [mcko]?
     Others  ::= 'C' [cfon]?"""
     tests = ["L", "Lu", "Ll", "Lt", "Lm", "Lo", "M", "Mn", "Mc", "Me",
              "N", "Nd", "Nl", "No", "P", "Pc", "Pd", "Ps", "Pe", "Pi",
              "Pf", "Po", "Z", "Zs", "Zl", "Zp", "S", "Sm", "Sc", "Sk",
              "So", "C", "Cc", "Cf", "Co", "Cn"]
     bad = ["A", "Za"]
     for s in tests:
         p = xsi.RegularExpressionParser(s)
         self.assertTrue(isinstance(p.require_is_category(), CharClass),
                         "Missing category: %s" % s)
         self.assertTrue(
             p.the_char is None, "Incomplete parse of category: %s" % s)
     for s in bad:
         p = xsi.RegularExpressionParser(s)
         try:
             p.require_is_category()
             self.assertFalse(
                 p.the_char is None, "Undetected bad category: %s" % s)
         except xsi.RegularExpressionError:
             pass
     tests = {
         # positive and negative tests
         'Nd': (
             u8(b'123\xdb\xb1\xdb\xb2\xdb\xb3'),
             u8(b'ABC\xe2\x85\x95\xe2\x85\x96\xe2\x85\x97\xe2\x85\x98')),
         'S': (u8(b'+<=>\xe2\x81\x84\xe2\x82\xac'), "(){}")
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser(b)
         cclass = p.require_is_category()
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in %s" % (repr(c), b))
示例#11
0
 def test_unicode(self):
     cp = imscp.ContentPackage(TEST_DATA_DIR.join('package_1'))
     resources = cp.manifest.root.Resources
     r = resources.Resource[0]
     self.assertTrue(len(r.File) == 1)
     f = r.File[0]
     self.assertTrue(isinstance(f, imscp.File) and
                     str(f.href) == "%E8%8B%B1%E5%9B%BD.xml", "File path")
     doc = xmlns.Document(baseURI=f.resolve_uri(f.href))
     doc.read()
     self.assertTrue(doc.root.xmlname == 'tag' and
                     doc.root.get_value() ==
                     u8(b'Unicode Test: \xe8\x8b\xb1\xe5\x9b\xbd'))
     cp2 = imscp.ContentPackage(
         TEST_DATA_DIR.join(u8(b'\xe8\x8b\xb1\xe5\x9b\xbd')))
     self.assertTrue(
         cp2.GetPackageName() == u8(b'\xe8\x8b\xb1\xe5\x9b\xbd'),
         "Unicode package name test")
示例#12
0
 def test_char_class_esc(self):
     """::
     charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
     """
     tests = {
         '\\?': ("?", "\\"),
         '\\d': (u8(b'123\xd9\xa1\xd9\xa2\xd9\xa3'),
                 u8(b'ABC\xe2\x82\x81\xe2\x82\x82\xe2\x82\x83')),
         '\\p{S}': (u8(b'+<=>\xe2\x81\x84\xe2\x82\xac'), "(){}"),
         '\\P{S}': ("(){}", u8(b'+<=>\xe2\x81\x84\xe2\x82\xac'))}
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser(b)
         cclass = p.require_char_class_esc()
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in %s" % (repr(c), b))
示例#13
0
 def test_char_class_esc(self):
     """::
     charClassEsc ::= ( SingleCharEsc | MultiCharEsc | catEsc | complEsc )
     """
     tests = {
         '\\?': ("?", "\\"),
         '\\d': (u8(b'123\xd9\xa1\xd9\xa2\xd9\xa3'),
                 u8(b'ABC\xe2\x82\x81\xe2\x82\x82\xe2\x82\x83')),
         '\\p{S}': (u8(b'+<=>\xe2\x81\x84\xe2\x82\xac'), "(){}"),
         '\\P{S}': ("(){}", u8(b'+<=>\xe2\x81\x84\xe2\x82\xac'))
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser(b)
         cclass = p.require_char_class_esc()
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in %s" % (repr(c), b))
示例#14
0
 def test_compl_esc(self):
     """::
     complEsc ::= '\P{' charProp '}' """
     tests = {
         # positive and negative tests
         '\\P{Nd}':
         (u8(b'ABC\xe2\x85\x95\xe2\x85\x96\xe2\x85\x97\xe2\x85\x98'),
          u8(b'123\xdb\xb1\xdb\xb2\xdb\xb3')),
         '\\P{S}': ("(){}", u8(b'+<=>\xe2\x81\x84\xe2\x82\xac')),
         '\\P{IsBasicLatin}': (ul("\xc0\xdf\xa9"), "ABC")
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser(b)
         cclass = p.require_compl_esc()
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in %s" % (repr(c), b))
示例#15
0
 def test_compl_esc(self):
     """::
     complEsc ::= '\P{' charProp '}' """
     tests = {
         # positive and negative tests
         '\\P{Nd}':
         (u8(b'ABC\xe2\x85\x95\xe2\x85\x96\xe2\x85\x97\xe2\x85\x98'),
          u8(b'123\xdb\xb1\xdb\xb2\xdb\xb3')),
         '\\P{S}': ("(){}", u8(b'+<=>\xe2\x81\x84\xe2\x82\xac')),
         '\\P{IsBasicLatin}': (ul("\xc0\xdf\xa9"), "ABC")
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser(b)
         cclass = p.require_compl_esc()
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in %s" % (repr(c), b))
示例#16
0
 def test_parse_integer(self):
     p = unicode5.BasicParser(ul("23p"))
     # all defaults, unbounded
     self.assertTrue(p.parse_integer() == 23)
     self.assertTrue(p.pos == 2)
     p.setpos(1)
     # provide a minimum value
     self.assertTrue(p.parse_integer(4) is None)
     self.assertTrue(p.parse_integer(2) == 3)
     p.setpos(1)
     # provide a minimum and maximum value
     self.assertTrue(p.parse_integer(0, 2) is None)
     self.assertTrue(p.parse_integer(1, 4) == 3)
     p.setpos(0)
     # min value < 0, should throw an error
     try:
         p.parse_integer(-1)
         self.fail("min = -1 didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 0)
     # min value > max, should throw an error
     try:
         p.parse_integer(3, 1)
         self.fail("min > max didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 0)
     # check we can exceed ordinary integer sizes
     istr = ul("123456789" + "0" * 256)
     p = unicode5.BasicParser(istr)
     # test max digits
     self.assertTrue(p.parse_integer(0, None, 10) == 1234567890)
     # check wide zeros
     self.assertTrue(p.parse_integer(0, None, 10) == 0)
     self.assertTrue(p.pos == 20)
     p.setpos(0)
     # check large numbers
     self.assertTrue(p.parse_integer(0, None, 15) == 123456789000000)
     # test Arabic digits, should not parse!
     p = unicode5.BasicParser(
         u8(b'\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5'
            b'\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9'))
     for i in range3(10):
         self.assertTrue(p.parse_integer() is None)
         p.next_char()
     # test binary forms
     p = unicode5.BasicParser(b"234p")
     self.assertTrue(p.parse_integer(max_digits=1) == 2)
     self.assertTrue(p.parse_integer(0, 2) is None)
     self.assertTrue(p.parse_integer() == 34)
     p.next_char()
     self.assertTrue(p.parse_integer() is None)
示例#17
0
 def test_parse_integer(self):
     p = unicode5.BasicParser(ul("23p"))
     # all defaults, unbounded
     self.assertTrue(p.parse_integer() == 23)
     self.assertTrue(p.pos == 2)
     p.setpos(1)
     # provide a minimum value
     self.assertTrue(p.parse_integer(4) is None)
     self.assertTrue(p.parse_integer(2) == 3)
     p.setpos(1)
     # provide a minimum and maximum value
     self.assertTrue(p.parse_integer(0, 2) is None)
     self.assertTrue(p.parse_integer(1, 4) == 3)
     p.setpos(0)
     # min value < 0, should throw an error
     try:
         p.parse_integer(-1)
         self.fail("min = -1 didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 0)
     # min value > max, should throw an error
     try:
         p.parse_integer(3, 1)
         self.fail("min > max didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 0)
     # check we can exceed ordinary integer sizes
     istr = ul("123456789" + "0" * 256)
     p = unicode5.BasicParser(istr)
     # test max digits
     self.assertTrue(p.parse_integer(0, None, 10) == 1234567890)
     # check wide zeros
     self.assertTrue(p.parse_integer(0, None, 10) == 0)
     self.assertTrue(p.pos == 20)
     p.setpos(0)
     # check large numbers
     self.assertTrue(p.parse_integer(0, None, 15) == 123456789000000)
     # test Arabic digits, should not parse!
     p = unicode5.BasicParser(
         u8(b'\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5'
            b'\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9'))
     for i in range3(10):
         self.assertTrue(p.parse_integer() is None)
         p.next_char()
     # test binary forms
     p = unicode5.BasicParser(b"234p")
     self.assertTrue(p.parse_integer(max_digits=1) == 2)
     self.assertTrue(p.parse_integer(0, 2) is None)
     self.assertTrue(p.parse_integer() == 34)
     p.next_char()
     self.assertTrue(p.parse_integer() is None)
示例#18
0
 def test_zip_write(self):
     cp = imscp.ContentPackage(TEST_DATA_DIR.join('package_1.zip'))
     self.dList.append(cp.dPath)
     cp.ExportToPIF('Package2.zip')
     cp2 = imscp.ContentPackage('Package2.zip')
     self.dList.append(cp2.dPath)
     resources = cp2.manifest.root.Resources
     f = resources.Resource[0].File[0]
     doc = xmlns.Document(baseURI=f.resolve_uri(f.href))
     doc.read()
     self.assertTrue(doc.root.xmlname == 'tag' and
                     doc.root.get_value() ==
                     u8(b'Unicode Test: \xe8\x8b\xb1\xe5\x9b\xbd'))
示例#19
0
 def test_multi_char_esc(self):
     """::
     MultiCharEsc ::= '\' [sSiIcCdDwW]"""
     tests = {
         # positive and negative tests
         's': ("\x09\x0A\x0D ", "ABC"),
         'i': ("ABC_:", "-123"),
         'c': ("ABC_:-_123", "@<>?"),
         'd': (u8(b'123\xd9\xa1\xd9\xa2\xd9\xa3'),
               u8(b'ABC\xe2\x82\x81\xe2\x82\x82\xe2\x82\x83')),
         'w': ("ABC", u8(b'!\xcd\xbe \xe2\x80\x82\x0c')),
     }
     for c in dict_keys(tests):
         p1 = xsi.RegularExpressionParser("\\" + c)
         cclass1 = p1.require_multi_char_esc()
         self.assertTrue(p1.pos == 2)
         p2 = xsi.RegularExpressionParser("\\" + c.upper())
         cclass2 = p2.require_multi_char_esc()
         self.assertTrue(p2.pos == 2)
         t1, t2 = tests[c]
         for c1 in t1:
             self.assertTrue(
                 cclass1.test(c1), "%s not in \\%s" % (repr(c1), c))
             self.assertFalse(
                 cclass2.test(c1), "%s in \\%s" % (repr(c1), c.upper()))
         for c2 in t2:
             self.assertFalse(
                 cclass1.test(c2), "%s in \\%s" % (repr(c2), c))
             self.assertTrue(cclass2.test(c2), "%s in \\%s" %
                             (repr(c2), c.upper()))
     p = xsi.RegularExpressionParser("\\x")
     try:
         p.require_multi_char_esc()
         self.fail("\\x")
     except xsi.RegularExpressionError:
         pass
示例#20
0
 def test_zip_read(self):
     cp = imscp.ContentPackage(TEST_DATA_DIR.join('package_1.zip'))
     self.assertTrue(cp.dPath.isdir(),
                     "Zip constructor must create a temp directory")
     # Ensure the temporary directory is cleaned up
     self.dList.append(cp.dPath)
     self.assertTrue(cp.GetPackageName() == 'package_1',
                     "Zip extension not removed for name")
     resources = cp.manifest.root.Resources
     f = resources.Resource[0].File[0]
     doc = xmlns.Document(baseURI=f.resolve_uri(f.href))
     doc.read()
     self.assertTrue(doc.root.xmlname == 'tag' and
                     doc.root.get_value() ==
                     u8(b'Unicode Test: \xe8\x8b\xb1\xe5\x9b\xbd'))
示例#21
0
 def test_multi_char_esc(self):
     """::
     MultiCharEsc ::= '\' [sSiIcCdDwW]"""
     tests = {
         # positive and negative tests
         's': ("\x09\x0A\x0D ", "ABC"),
         'i': ("ABC_:", "-123"),
         'c': ("ABC_:-_123", "@<>?"),
         'd': (u8(b'123\xd9\xa1\xd9\xa2\xd9\xa3'),
               u8(b'ABC\xe2\x82\x81\xe2\x82\x82\xe2\x82\x83')),
         'w': ("ABC", u8(b'!\xcd\xbe \xe2\x80\x82\x0c')),
     }
     for c in dict_keys(tests):
         p1 = xsi.RegularExpressionParser("\\" + c)
         cclass1 = p1.require_multi_char_esc()
         self.assertTrue(p1.pos == 2)
         p2 = xsi.RegularExpressionParser("\\" + c.upper())
         cclass2 = p2.require_multi_char_esc()
         self.assertTrue(p2.pos == 2)
         t1, t2 = tests[c]
         for c1 in t1:
             self.assertTrue(cclass1.test(c1),
                             "%s not in \\%s" % (repr(c1), c))
             self.assertFalse(cclass2.test(c1),
                              "%s in \\%s" % (repr(c1), c.upper()))
         for c2 in t2:
             self.assertFalse(cclass1.test(c2),
                              "%s in \\%s" % (repr(c2), c))
             self.assertTrue(cclass2.test(c2),
                             "%s in \\%s" % (repr(c2), c.upper()))
     p = xsi.RegularExpressionParser("\\x")
     try:
         p.require_multi_char_esc()
         self.fail("\\x")
     except xsi.RegularExpressionError:
         pass
示例#22
0
 def test_parse_digits(self):
     p = unicode5.BasicParser(ul("23p"))
     # min value of 0
     self.assertTrue(p.parse_digits(0) == ul("23"))
     self.assertTrue(p.pos == 2)
     # min value of 2, should fail
     p.setpos(1)
     self.assertTrue(p.parse_digits(2) is None)
     # shouldn't move the parser
     self.assertTrue(p.pos == 1)
     # min value of 0, should throw an error
     try:
         p.parse_digits(-1)
         self.fail("min=-1 didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 1)
     # min value > max, should throw an error
     try:
         p.parse_digits(3, 1)
         self.fail("min > max didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 1)
     # check we can exceed ordinary integer sizes
     istr = ul("123456789" + "0" * 256)
     p = unicode5.BasicParser(istr)
     self.assertTrue(len(p.parse_digits(0, 256)) == 256)
     # and check that runs of 0 don't mean a thing
     self.assertTrue(p.parse_digits(0, 256) == ul("000000000"))
     # test Arabic digits, should not parse!
     p = unicode5.BasicParser(
         u8(b'\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5'
            b'\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9'))
     for i in range3(10):
         self.assertTrue(p.parse_digits(1) is None)
         p.next_char()
     # test binary forms
     p = unicode5.BasicParser(b"234p")
     # unlike parse_digit we return a string, even if only one digit
     self.assertTrue(p.parse_digits(1, 1) == b"2")
     self.assertTrue(p.parse_digits(1) == b"34")
     p.next_char()
     self.assertTrue(p.parse_digits(1) is None)
     self.assertTrue(p.parse_digits(0) == b"")
示例#23
0
 def test_parse_digits(self):
     p = unicode5.BasicParser(ul("23p"))
     # min value of 0
     self.assertTrue(p.parse_digits(0) == ul("23"))
     self.assertTrue(p.pos == 2)
     # min value of 2, should fail
     p.setpos(1)
     self.assertTrue(p.parse_digits(2) is None)
     # shouldn't move the parser
     self.assertTrue(p.pos == 1)
     # min value of 0, should throw an error
     try:
         p.parse_digits(-1)
         self.fail("min=-1 didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 1)
     # min value > max, should throw an error
     try:
         p.parse_digits(3, 1)
         self.fail("min > max didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 1)
     # check we can exceed ordinary integer sizes
     istr = ul("123456789" + "0" * 256)
     p = unicode5.BasicParser(istr)
     self.assertTrue(len(p.parse_digits(0, 256)) == 256)
     # and check that runs of 0 don't mean a thing
     self.assertTrue(p.parse_digits(0, 256) == ul("000000000"))
     # test Arabic digits, should not parse!
     p = unicode5.BasicParser(
         u8(b'\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5'
            b'\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9'))
     for i in range3(10):
         self.assertTrue(p.parse_digits(1) is None)
         p.next_char()
     # test binary forms
     p = unicode5.BasicParser(b"234p")
     # unlike parse_digit we return a string, even if only one digit
     self.assertTrue(p.parse_digits(1, 1) == b"2")
     self.assertTrue(p.parse_digits(1) == b"34")
     p.next_char()
     self.assertTrue(p.parse_digits(1) is None)
     self.assertTrue(p.parse_digits(0) == b"")
示例#24
0
 def test_parse_digit_value(self):
     p = unicode5.BasicParser(ul("2p"))
     self.assertTrue(p.parse_digit_value() == 2)
     self.assertTrue(p.pos == 1)
     self.assertTrue(p.parse_digit_value() is None)
     p.next_char()
     self.assertTrue(p.parse_digit_value() is None)
     # test Arabic digits, should not parse!
     p = unicode5.BasicParser(
         u8(b'\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5'
            b'\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9'))
     for i in range3(10):
         self.assertTrue(p.parse_digit_value() is None)
         p.next_char()
     # test binary forms
     p = unicode5.BasicParser(b"2p")
     self.assertTrue(p.parse_digit_value() == 2)
     self.assertTrue(p.parse_digit_value() is None)
     p.next_char()
     self.assertTrue(p.parse_digit_value() is None)
示例#25
0
 def test_match_digit(self):
     p = unicode5.BasicParser(ul("2p"))
     self.assertTrue(p.match_digit())
     p.next_char()
     self.assertFalse(p.match_digit())
     p.next_char()
     self.assertFalse(p.match_digit())
     # test Arabic digits, should not match!
     p = unicode5.BasicParser(
         u8(b'\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5'
            b'\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9'))
     for i in range3(10):
         self.assertFalse(p.match_digit())
         p.next_char()
     p = unicode5.BasicParser(b"2p")
     self.assertTrue(p.match_digit())
     p.next_char()
     self.assertFalse(p.match_digit())
     p.next_char()
     self.assertFalse(p.match_digit())
示例#26
0
 def test_match_digit(self):
     p = unicode5.BasicParser(ul("2p"))
     self.assertTrue(p.match_digit())
     p.next_char()
     self.assertFalse(p.match_digit())
     p.next_char()
     self.assertFalse(p.match_digit())
     # test Arabic digits, should not match!
     p = unicode5.BasicParser(
         u8(b'\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5'
            b'\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9'))
     for i in range3(10):
         self.assertFalse(p.match_digit())
         p.next_char()
     p = unicode5.BasicParser(b"2p")
     self.assertTrue(p.match_digit())
     p.next_char()
     self.assertFalse(p.match_digit())
     p.next_char()
     self.assertFalse(p.match_digit())
示例#27
0
 def test_parse_digit_value(self):
     p = unicode5.BasicParser(ul("2p"))
     self.assertTrue(p.parse_digit_value() == 2)
     self.assertTrue(p.pos == 1)
     self.assertTrue(p.parse_digit_value() is None)
     p.next_char()
     self.assertTrue(p.parse_digit_value() is None)
     # test Arabic digits, should not parse!
     p = unicode5.BasicParser(
         u8(b'\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5'
            b'\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9'))
     for i in range3(10):
         self.assertTrue(p.parse_digit_value() is None)
         p.next_char()
     # test binary forms
     p = unicode5.BasicParser(b"2p")
     self.assertTrue(p.parse_digit_value() == 2)
     self.assertTrue(p.parse_digit_value() is None)
     p.next_char()
     self.assertTrue(p.parse_digit_value() is None)
示例#28
0
 def test_char_class(self):
     """::
     charClass ::= charClassEsc | charClassExpr | WildcardEsc """
     tests = {
         '\\P{S}': ("(){}", u8(b'+<=>\xe2\x81\x84\xe2\x82\xac')),
         '[A-z-[\[-\]]]': ("AZaz^_`", "[\\]@{-"),
         '.': ("abcABC ", "\x0a\x0d")
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser(b)
         try:
             cclass = p.require_char_class()
         except xsi.RegularExpressionError:
             logging.debug("Failed to parse %s" % repr(b))
             raise
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in %s" % (repr(c), b))
示例#29
0
 def test_char_class(self):
     """::
     charClass ::= charClassEsc | charClassExpr | WildcardEsc """
     tests = {
         '\\P{S}': ("(){}", u8(b'+<=>\xe2\x81\x84\xe2\x82\xac')),
         '[A-z-[\[-\]]]': ("AZaz^_`", "[\\]@{-"),
         '.': ("abcABC ", "\x0a\x0d")
     }
     for b in dict_keys(tests):
         p = xsi.RegularExpressionParser(b)
         try:
             cclass = p.require_char_class()
         except xsi.RegularExpressionError:
             logging.debug("Failed to parse %s" % repr(b))
             raise
         self.assertTrue(p.the_char is None)
         t1, t2 = tests[b]
         for c in t1:
             self.assertTrue(cclass.test(c), "%s not in %s" % (repr(c), b))
         for c in t2:
             self.assertFalse(cclass.test(c), "%s in %s" % (repr(c), b))
示例#30
0
 def test_literals(self):
     data1 = "hello"
     if sys.version_info[0] < 3:
         target_type = types.UnicodeType
     else:
         target_type = str
     self.assertTrue(py2.u8(b"hello") == data1)
     self.assertTrue(isinstance(py2.u8(b"hello"), target_type))
     self.assertTrue(py2.ul(b"hello") == data1)
     self.assertTrue(isinstance(py2.ul(b"hello"), target_type))
     data2 = b'Caf\xc3\xa9'.decode('utf-8')
     self.assertTrue(py2.u8(b'Caf\xc3\xa9') == data2)
     self.assertTrue(py2.ul(b'Caf\xe9') == data2)
     data3 = b'\xe8\x8b\xb1\xe5\x9b\xbd'.decode('utf-8')
     self.assertTrue(py2.u8(b'\xe8\x8b\xb1\xe5\x9b\xbd') == data3)
     # Catch common errors
     # 1: missing b in literal, OK for ASCII text
     self.assertTrue(py2.u8("hello") == data1)
     self.assertTrue(py2.ul("hello") == data1)
     # 2: missing b, u8 fails for 8-bit character
     try:
         py2.u8('Caf\xe9')
         self.fail('8-bit unqualified literal (bad UTF-8)')
     except UnicodeDecodeError:
         self.fail('8-bit unqualified literal decoded as utf-8')
     except ValueError:
         pass
     # ... but in Python 2 we can't catch valid utf-8 sequences
     # pretending to be unicode strings
     try:
         py2.u8('Caf\xc3\xa9')
         self.assertTrue(sys.version_info[0] < 3,
                         '8-bit unqualified literal (good UTF-8)')
     except ValueError:
         pass
     # 3: missing b, ul accepted with 8-bit character
     self.assertTrue(py2.ul('Caf\xe9')) == data2
     # 4: missing b, u8 fails for 16-bit character
     try:
         # in python 2 we can't catch this but it was probably a bug
         # before anyway due to the missing 'u'
         result = py2.u8('\u82f1\u56fd')
         self.assertTrue(sys.version_info[0] < 3,
                         '16-bit unqualified literal')
         self.assertTrue(result == '\\u82f1\\u56fd')
     except ValueError:
         self.assertFalse(sys.version_info[0] < 3,
                          '16-bit unqualified literal')
     # 5: missing b, ul fails for 16-bit character
     try:
         result = py2.ul('\u82f1\u56fd')
         self.assertTrue(sys.version_info[0] < 3,
                         '16-bit unqualified literal')
         self.assertTrue(result == '\\u82f1\\u56fd')
     except ValueError:
         self.assertFalse(sys.version_info[0] < 3,
                          '16-bit unqualified literal')
     # 6: input already qualified with 'u', benign for ASCII
     self.assertTrue(py2.u8(u"hello") == data1)
     self.assertTrue(py2.ul(u"hello") == data1)
     # ...u8 fails for 8-bit character
     try:
         py2.u8(u'Caf\xe9')
         self.fail('8-bit qualified literal')
     except UnicodeEncodeError:
         self.fail('8-bit qualified literal uncaught encode error')
     except ValueError:
         pass
     # ...ul accepted with 8-bit character
     self.assertTrue(py2.ul(u'Caf\xe9')) == data2
     # ...u8 fails for 16-bit character
     try:
         py2.u8(u'\u82f1\u56fd')
         self.fail('16-bit qualified literal')
     except UnicodeEncodeError:
         self.fail('16-bit qualified literal uncaught encode error')
     except ValueError:
         pass
     # ...ul fails for 16-bit character
     try:
         py2.ul(u'\u82f1\u56fd')
         self.fail('16-bit qualified literal')
     except UnicodeEncodeError:
         self.fail('16-bit qualified literal uncaught encode error')
     except ValueError:
         pass
示例#31
0
 def test_literals(self):
     data1 = "hello"
     if sys.version_info[0] < 3:
         target_type = types.UnicodeType
     else:
         target_type = str
     self.assertTrue(py2.u8(b"hello") == data1)
     self.assertTrue(isinstance(py2.u8(b"hello"), target_type))
     self.assertTrue(py2.ul(b"hello") == data1)
     self.assertTrue(isinstance(py2.ul(b"hello"), target_type))
     data2 = b'Caf\xc3\xa9'.decode('utf-8')
     self.assertTrue(py2.u8(b'Caf\xc3\xa9') == data2)
     self.assertTrue(py2.ul(b'Caf\xe9') == data2)
     data3 = b'\xe8\x8b\xb1\xe5\x9b\xbd'.decode('utf-8')
     self.assertTrue(py2.u8(b'\xe8\x8b\xb1\xe5\x9b\xbd') == data3)
     # Catch common errors
     # 1: missing b in literal, OK for ASCII text
     self.assertTrue(py2.u8("hello") == data1)
     self.assertTrue(py2.ul("hello") == data1)
     # 2: missing b, u8 fails for 8-bit character
     try:
         py2.u8('Caf\xe9')
         self.fail('8-bit unqualified literal (bad UTF-8)')
     except UnicodeDecodeError:
         self.fail('8-bit unqualified literal decoded as utf-8')
     except ValueError:
         pass
     # ... but in Python 2 we can't catch valid utf-8 sequences
     # pretending to be unicode strings
     try:
         py2.u8('Caf\xc3\xa9')
         self.assertTrue(sys.version_info[0] < 3,
                         '8-bit unqualified literal (good UTF-8)')
     except ValueError:
         pass
     # 3: missing b, ul accepted with 8-bit character
     self.assertTrue(py2.ul('Caf\xe9')) == data2
     # 4: missing b, u8 fails for 16-bit character
     try:
         # in python 2 we can't catch this but it was probably a bug
         # before anyway due to the missing 'u'
         result = py2.u8('\u82f1\u56fd')
         self.assertTrue(sys.version_info[0] < 3,
                         '16-bit unqualified literal')
         self.assertTrue(result == '\\u82f1\\u56fd')
     except ValueError:
         self.assertFalse(sys.version_info[0] < 3,
                          '16-bit unqualified literal')
     # 5: missing b, ul fails for 16-bit character
     try:
         result = py2.ul('\u82f1\u56fd')
         self.assertTrue(sys.version_info[0] < 3,
                         '16-bit unqualified literal')
         self.assertTrue(result == '\\u82f1\\u56fd')
     except ValueError:
         self.assertFalse(sys.version_info[0] < 3,
                          '16-bit unqualified literal')
     # 6: input already qualified with 'u', benign for ASCII
     self.assertTrue(py2.u8(u"hello") == data1)
     self.assertTrue(py2.ul(u"hello") == data1)
     # ...u8 fails for 8-bit character
     try:
         py2.u8(u'Caf\xe9')
         self.fail('8-bit qualified literal')
     except UnicodeEncodeError:
         self.fail('8-bit qualified literal uncaught encode error')
     except ValueError:
         pass
     # ...ul accepted with 8-bit character
     self.assertTrue(py2.ul(u'Caf\xe9')) == data2
     # ...u8 fails for 16-bit character
     try:
         py2.u8(u'\u82f1\u56fd')
         self.fail('16-bit qualified literal')
     except UnicodeEncodeError:
         self.fail('16-bit qualified literal uncaught encode error')
     except ValueError:
         pass
     # ...ul fails for 16-bit character
     try:
         py2.ul(u'\u82f1\u56fd')
         self.fail('16-bit qualified literal')
     except UnicodeEncodeError:
         self.fail('16-bit qualified literal uncaught encode error')
     except ValueError:
         pass
示例#32
0
import codecs
import logging
import unittest

from sys import maxunicode

import pyslet.unicode5 as unicode5

from pyslet.py2 import (byte, character, is_text, join_bytes, py2, range3, u8,
                        ul)

MAX_CHAR = 0x10FFFF
if maxunicode < MAX_CHAR:
    MAX_CHAR = maxunicode

CHINESE_TEST = u8(b'\xe8\x8b\xb1\xe5\x9b\xbd')


def suite():
    return unittest.TestSuite(
        (unittest.makeSuite(EncodingTests, 'test'),
         unittest.makeSuite(CharClassTests,
                            'test'), unittest.makeSuite(UCDTests, 'test'),
         unittest.makeSuite(ParserTests, 'test')))


class EncodingTests(unittest.TestCase):
    def test_detection(self):
        test_string = u"Caf\xe9"
        for codec, bom in (('utf_8', codecs.BOM_UTF8), ('utf_32_be',
                                                        codecs.BOM_UTF32_BE),
示例#33
0
 def test_parse_hex_digits(self):
     src = ul("23.FG.fg.0.00.abcdefABCDEF0123456789")
     p = unicode5.BasicParser(src)
     pb = unicode5.BasicParser(src.encode('ascii'))
     # min value of 0, should throw an error
     try:
         p.parse_hex_digits(-1)
         self.fail("min=-1 didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 0)
     # min value > max, should throw an error
     try:
         p.parse_hex_digits(3, 1)
         self.fail("min > max didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 0)
     # check min value of 1
     result = [
         ul("23"),
         ul("F"),
         ul("f"),
         ul("0"),
         ul("00"),
         ul("abcdefABCDEF0123456789")
     ]
     i = 0
     while p.the_char is not None:
         resulti = p.parse_hex_digits(1)
         bresulti = pb.parse_hex_digits(1)
         if resulti is not None:
             self.assertTrue(resulti == result[i], resulti)
             self.assertTrue(bresulti == result[i].encode('ascii'),
                             bresulti)
             i += 1
         p.next_char()
         pb.next_char()
     self.assertTrue(i == len(result))
     # min value of 2
     p.setpos(0)
     pb.setpos(0)
     result = [ul("23"), ul("00"), ul("abcdefABCDEF0123456789")]
     i = 0
     while p.the_char is not None:
         resulti = p.parse_hex_digits(2)
         bresulti = pb.parse_hex_digits(2)
         if resulti is not None:
             self.assertTrue(resulti == result[i], resulti)
             self.assertTrue(bresulti == result[i].encode('ascii'),
                             bresulti)
             i += 1
         p.next_char()
         pb.next_char()
     self.assertTrue(i == len(result))
     p.setpos(0)
     pb.setpos(0)
     result = [
         ul("23"),
         ul("00"),
         ul("abcde"),
         ul("ABCDE"),
         ul("01234"),
         ul("6789")
     ]
     i = 0
     while p.the_char is not None:
         resulti = p.parse_hex_digits(2, 5)
         bresulti = pb.parse_hex_digits(2, 5)
         if resulti is not None:
             self.assertTrue(resulti == result[i], resulti)
             self.assertTrue(bresulti == result[i].encode('ascii'),
                             bresulti)
             i += 1
         p.next_char()
         pb.next_char()
     self.assertTrue(i == len(result))
     # check we can exceed ordinary integer sizes
     istr = ul("123456789aBcDeF" + "0" * 256)
     p = unicode5.BasicParser(istr)
     self.assertTrue(len(p.parse_hex_digits(1, 256)) == 256)
     # and check that runs of 0 don't mean a thing
     self.assertTrue(p.parse_hex_digits(1, 256) == ul("000000000000000"))
     # test Arabic digits, should not parse!
     p = unicode5.BasicParser(
         u8(b'\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5'
            b'\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9'))
     for i in range3(10):
         self.assertTrue(p.parse_hex_digits(1) is None)
         p.next_char()
示例#34
0
    def test_nss(self):
        """Syntax for URN char::

            <trans> | "%" <hex> <hex>

        Translation is done by encoding each character outside the URN
        character set as a sequence of one to six octets using UTF-8
        encoding [5], and the encoding of each of those octets as "%"
        followed by two characters from the <hex> character set above.

        the character [%] used in a literal sense MUST be encoded

        a character MUST NOT be "%"-encoded if the character is not a
        reserved character

        SHOULD NOT use [other reserved characters] characters in
        unencoded form

        each character outside the URN character set [is encoded] as a
        sequence of one to six octets using UTF-8 encoding

        The presence of an "%" character in an URN MUST be followed by
        two characters from the <hex> character set

        In addition, octet 0 (0 hex) should NEVER be used, in either
        unencoded or %-encoded form."""
        trans_tests = {
            ul('\x01\x02\x03\x04\x05\x06\x07\x08\t\n\x0b\x0c\r\x0e\x0f\x10'
               '\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f '
               '!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\'
               ']^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f'):
            '%01%02%03%04%05%06%07%08%09%0A%0B%0C%0D%0E%0F%10'
            '%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20'
            '!%22%23$%25%26\'()*+,-.%2F0123456789:;%3C=%3E%3F@ABCDEFGHIJKLMN'
            'OPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D'
            '%7E%7F',
            u8(b'\xe8\x8b\xb1\xe5\x9b\xbd'): '%E8%8B%B1%E5%9B%BD',
            ul('Caf\xe9'): 'Caf%C3%A9'
            }
        for src, dst in dict_items(trans_tests):
            self.assertTrue(
                urn.translate_to_urnchar(src) == dst,
                "%s -> \n%s, expected \n%s" %
                (repr(src),
                 repr(urn.translate_to_urnchar(src)),
                 repr(dst)))
            self.assertTrue(
                urn.translate_from_urnchar(dst) == src,
                "%s -> \n%s, expected \n%s" %
                (repr(dst),
                 repr(urn.translate_from_urnchar(dst)), repr(src)))
            u = urn.URN(nid='foo', nss=dst)
            self.assertTrue(u.nss == dst)
            u = uri.URI.from_octets('urn:foo:%s' % dst)
            self.assertTrue(u.nss == dst)
        for wrong in ("100% wrong", "Zero%00"):
            try:
                urn.translate_from_urnchar(wrong)
                self.fail("%s test in URN" % repr(wrong))
            except ValueError:
                pass
        try:
            urn.translate_to_urnchar("Zero\x00Byte")
            self.fail("Zero byte test in URN")
        except ValueError:
            pass
        # let's invent a scheme whereby the reserved characters
        # include . which is reserved for special meaning and
        # / is used unencoded as a path separator (even though
        # it is reserved and *SHOULD* be encoded

        def dot(c):
            return c == "."

        src = "urn:path:.steve/file%2Ename/easy_come%2Feasy_go"
        u = uri.URI.from_octets(src)
        path = u.nss.replace('.', 'users/')
        path = [urn.translate_from_urnchar(s) for s in path.split('/')]
        self.assertTrue(path == [
            'users', 'steve', 'file.name', 'easy_come/easy_go'],
            "Parsed: %s" % repr(path))
        path = path[1:]
        # / is always reserved so we don't need to call this out
        path = [urn.translate_to_urnchar(x, dot) for x in path]
        # add the newly reserved characters after translation...
        path = '.' + '/'.join(path)
        u2 = urn.URN(nid='path', nss=path)
        self.assertTrue(u == u2)
        self.assertTrue(str(u) == str(u2))
示例#35
0
import logging
import unittest

from sys import maxunicode

import pyslet.unicode5 as unicode5

from pyslet.py2 import byte, character, is_text, join_bytes, u8, ul
from pyslet.py2 import py2, range3


MAX_CHAR = 0x10FFFF
if maxunicode < MAX_CHAR:
    MAX_CHAR = maxunicode

CHINESE_TEST = u8(b'\xe8\x8b\xb1\xe5\x9b\xbd')


def suite():
    return unittest.TestSuite((
        unittest.makeSuite(EncodingTests, 'test'),
        unittest.makeSuite(CharClassTests, 'test'),
        unittest.makeSuite(UCDTests, 'test'),
        unittest.makeSuite(ParserTests, 'test')
    ))


class EncodingTests(unittest.TestCase):

    def test_detection(self):
        test_string = u"Caf\xe9"
示例#36
0
 def test_parse_hex_digits(self):
     src = ul("23.FG.fg.0.00.abcdefABCDEF0123456789")
     p = unicode5.BasicParser(src)
     pb = unicode5.BasicParser(src.encode('ascii'))
     # min value of 0, should throw an error
     try:
         p.parse_hex_digits(-1)
         self.fail("min=-1 didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 0)
     # min value > max, should throw an error
     try:
         p.parse_hex_digits(3, 1)
         self.fail("min > max didn't raise exception")
     except ValueError:
         # and it shouldn't move the parser
         self.assertTrue(p.pos == 0)
     # check min value of 1
     result = [ul("23"), ul("F"), ul("f"), ul("0"), ul("00"),
               ul("abcdefABCDEF0123456789")]
     i = 0
     while p.the_char is not None:
         resulti = p.parse_hex_digits(1)
         bresulti = pb.parse_hex_digits(1)
         if resulti is not None:
             self.assertTrue(resulti == result[i], resulti)
             self.assertTrue(bresulti == result[i].encode('ascii'),
                             bresulti)
             i += 1
         p.next_char()
         pb.next_char()
     self.assertTrue(i == len(result))
     # min value of 2
     p.setpos(0)
     pb.setpos(0)
     result = [ul("23"), ul("00"), ul("abcdefABCDEF0123456789")]
     i = 0
     while p.the_char is not None:
         resulti = p.parse_hex_digits(2)
         bresulti = pb.parse_hex_digits(2)
         if resulti is not None:
             self.assertTrue(resulti == result[i], resulti)
             self.assertTrue(bresulti == result[i].encode('ascii'),
                             bresulti)
             i += 1
         p.next_char()
         pb.next_char()
     self.assertTrue(i == len(result))
     p.setpos(0)
     pb.setpos(0)
     result = [ul("23"), ul("00"), ul("abcde"), ul("ABCDE"), ul("01234"),
               ul("6789")]
     i = 0
     while p.the_char is not None:
         resulti = p.parse_hex_digits(2, 5)
         bresulti = pb.parse_hex_digits(2, 5)
         if resulti is not None:
             self.assertTrue(resulti == result[i], resulti)
             self.assertTrue(bresulti == result[i].encode('ascii'),
                             bresulti)
             i += 1
         p.next_char()
         pb.next_char()
     self.assertTrue(i == len(result))
     # check we can exceed ordinary integer sizes
     istr = ul("123456789aBcDeF" + "0" * 256)
     p = unicode5.BasicParser(istr)
     self.assertTrue(len(p.parse_hex_digits(1, 256)) == 256)
     # and check that runs of 0 don't mean a thing
     self.assertTrue(p.parse_hex_digits(1, 256) == ul("000000000000000"))
     # test Arabic digits, should not parse!
     p = unicode5.BasicParser(
         u8(b'\xd9\xa0\xd9\xa1\xd9\xa2\xd9\xa3\xd9\xa4\xd9\xa5'
            b'\xd9\xa6\xd9\xa7\xd9\xa8\xd9\xa9'))
     for i in range3(10):
         self.assertTrue(p.parse_hex_digits(1) is None)
         p.next_char()