示例#1
0
def test_general_category():
    from rpython.rlib.unicodedata import unicodedb

    for cat, positive, negative in [('L', u'aZ\xe9', u'. ?'),
                                    ('P', u'.?', u'aZ\xe9 ')]:
        pat_pos = [70, ord(cat), 0]
        pat_neg = [70, ord(cat) | 0x80, 0]
        for c in positive:
            assert unicodedb.category(ord(c)).startswith(cat)
            assert rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
            assert not rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))
        for c in negative:
            assert not unicodedb.category(ord(c)).startswith(cat)
            assert not rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
            assert rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))

    def cat2num(cat):
        return ord(cat[0]) | (ord(cat[1]) << 8)

    for cat, positive, negative in [('Lu', u'A', u'z\xe9 '),
                                    ('Ll', u'z\xe9', u'A \n')]:
        pat_pos = [70, cat2num(cat), 0]
        pat_neg = [70, cat2num(cat) | 0x80, 0]
        for c in positive:
            assert unicodedb.category(ord(c)) == cat
            assert rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
            assert not rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))
        for c in negative:
            assert unicodedb.category(ord(c)) != cat
            assert not rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
            assert rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))

    # test for how the common 'L&' pattern might be compiled
    pat = [70, cat2num('Lu'), 70, cat2num('Ll'), 70, cat2num('Lt'), 0]
    assert rsre_char.check_charset(Ctx(pat), 0, 65)    # Lu
    assert rsre_char.check_charset(Ctx(pat), 0, 99)    # Ll
    assert rsre_char.check_charset(Ctx(pat), 0, 453)   # Lt
    assert not rsre_char.check_charset(Ctx(pat), 0, 688)    # Lm
    assert not rsre_char.check_charset(Ctx(pat), 0, 5870)   # Nl
示例#2
0
def test_general_category():
    from rpython.rlib.unicodedata import unicodedb

    for cat, positive, negative in [('L', u'aZ\xe9', u'. ?'),
                                    ('P', u'.?', u'aZ\xe9 ')]:
        pat_pos = [70, ord(cat), 0]
        pat_neg = [70, ord(cat) | 0x80, 0]
        for c in positive:
            assert unicodedb.category(ord(c)).startswith(cat)
            assert rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
            assert not rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))
        for c in negative:
            assert not unicodedb.category(ord(c)).startswith(cat)
            assert not rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
            assert rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))

    def cat2num(cat):
        return ord(cat[0]) | (ord(cat[1]) << 8)

    for cat, positive, negative in [('Lu', u'A', u'z\xe9 '),
                                    ('Ll', u'z\xe9', u'A \n')]:
        pat_pos = [70, cat2num(cat), 0]
        pat_neg = [70, cat2num(cat) | 0x80, 0]
        for c in positive:
            assert unicodedb.category(ord(c)) == cat
            assert rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
            assert not rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))
        for c in negative:
            assert unicodedb.category(ord(c)) != cat
            assert not rsre_char.check_charset(Ctx(pat_pos), 0, ord(c))
            assert rsre_char.check_charset(Ctx(pat_neg), 0, ord(c))

    # test for how the common 'L&' pattern might be compiled
    pat = [70, cat2num('Lu'), 70, cat2num('Ll'), 70, cat2num('Lt'), 0]
    assert rsre_char.check_charset(Ctx(pat), 0, 65)  # Lu
    assert rsre_char.check_charset(Ctx(pat), 0, 99)  # Ll
    assert rsre_char.check_charset(Ctx(pat), 0, 453)  # Lt
    assert not rsre_char.check_charset(Ctx(pat), 0, 688)  # Lm
    assert not rsre_char.check_charset(Ctx(pat), 0, 5870)  # Nl