Пример #1
0
def str_replace__Rope_Rope_Rope_ANY(space, w_self, w_sub, w_by, w_maxsplit=-1):

    node = w_self._node
    length = node.length()
    sub = w_sub._node
    by = w_by._node
    maxsplit = space.int_w(w_maxsplit)
    if maxsplit == 0:
        return w_self.create_if_subclassed()

    if not sub.length():
        upper = node.length()
        if maxsplit > 0 and maxsplit < upper + 2:
            upper = maxsplit - 1
            assert upper >= 0
        substrings = [by]
        iter = rope.ItemIterator(node)
        for i in range(upper):
            substrings.append(iter.nextrope())
            substrings.append(by)
        substrings.append(rope.getslice_one(node, upper, length))
        try:
            return W_RopeObject(rope.rebalance(substrings))
        except OverflowError:
            raise OperationError(space.w_OverflowError,
                                 space.wrap("string too long"))
    substrings = rope.split(node, sub, maxsplit)
    if not substrings:
        return w_self.create_if_subclassed()
    try:
        return W_RopeObject(rope.join(by, substrings))
    except OverflowError:
        raise OperationError(space.w_OverflowError,
                             space.wrap("string too long"))
Пример #2
0
def unicode_translate__RopeUnicode_ANY(space, w_self, w_table):
    self = w_self._node
    w_sys = space.getbuiltinmodule('sys')
    maxunicode = space.int_w(space.getattr(w_sys, space.wrap("maxunicode")))
    result = []
    iter = rope.ItemIterator(self)
    for i in range(self.length()):
        crope = iter.nextrope()
        char = crope.getint(0)
        try:
            w_newval = space.getitem(w_table, space.wrap(char))
        except OperationError, e:
            if e.match(space, space.w_LookupError):
                result.append(crope)
            else:
                raise
        else:
            if space.is_w(w_newval, space.w_None):
                continue
            elif space.isinstance_w(w_newval, space.w_int):
                newval = space.int_w(w_newval)
                if newval < 0 or newval > maxunicode:
                    raise OperationError(
                        space.w_TypeError,
                        space.wrap("character mapping must be in range(0x%x)" %
                                   (maxunicode + 1, )))
                result.append(rope.rope_from_unichar(unichr(newval)))
            elif space.isinstance_w(w_newval, space.w_unicode):
                result.append(ropeunicode_w(space, w_newval))
            else:
                raise OperationError(
                    space.w_TypeError,
                    space.wrap(
                        "character mapping must return integer, None or unicode"
                    ))
Пример #3
0
def unicode_to_decimal_w(space, w_unistr):
    if not isinstance(w_unistr, W_RopeUnicodeObject):
        raise OperationError(space.w_TypeError, space.wrap("expected unicode"))
    unistr = w_unistr._node
    length = unistr.length()
    result = ['\0'] * length
    digits = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
    iter = rope.ItemIterator(unistr)
    for i in range(length):
        uchr = iter.nextint()
        if unicodedb.isspace(uchr):
            result[i] = ' '
            continue
        try:
            result[i] = digits[unicodedb.decimal(uchr)]
        except KeyError:
            if 0 < uchr < 256:
                result[i] = chr(uchr)
            else:
                w_encoding = space.wrap('decimal')
                w_start = space.wrap(i)
                w_end = space.wrap(i + 1)
                w_reason = space.wrap('invalid decimal Unicode string')
                raise OperationError(
                    space.w_UnicodeEncodeError,
                    space.newtuple(
                        [w_encoding, w_unistr, w_start, w_end, w_reason]))
    return ''.join(result)
Пример #4
0
def str_translate__Rope_ANY_ANY(space, w_string, w_table, w_deletechars=''):
    """charfilter - unicode handling is not implemented
    
    Return a copy of the string where all characters occurring 
    in the optional argument deletechars are removed, and the 
    remaining characters have been mapped through the given translation table, 
    which must be a string of length 256"""

    # XXX CPython accepts buffers, too, not sure what we should do
    table = space.str_w(w_table)
    if len(table) != 256:
        raise OperationError(
            space.w_ValueError,
            space.wrap("translation table must be 256 characters long"))

    node = w_string._node
    chars = []
    iter = rope.ItemIterator(node)
    while 1:
        try:
            c = iter.nextchar()
            w_char = W_RopeObject.PREBUILT[ord(c)]
            if not space.is_true(space.contains(w_deletechars, w_char)):
                 chars.append(table[ord(c)])
        except StopIteration:
            break
    return W_RopeObject(rope.rope_from_charlist(chars))
Пример #5
0
def _is_generic(space, w_self, fun): 
    l = w_self._node.length()
    if l == 0:
        return space.w_False
    iter = rope.ItemIterator(w_self._node)
    for i in range(l):
        if not fun(iter.nextchar()):
            return space.w_False
    return space.w_True
Пример #6
0
 def func(space, w_self):
     node = w_self._node
     if node.length() == 0:
         return space.w_False
     iter = rope.ItemIterator(node)
     for idx in range(node.length()):
         if not getattr(unicodedb, funcname)(iter.nextint()):
             return space.w_False
     return space.w_True
Пример #7
0
def _local_transform(node, transform):
    l = node.length()
    res = [' '] * l
    iter = rope.ItemIterator(node)
    for i in range(l):
        ch = iter.nextchar()
        res[i] = transform(ch)

    return W_RopeObject(rope.rope_from_charlist(res))
Пример #8
0
def buffer__RopeUnicode(space, w_unicode):
    from pypy.rlib.rstruct.unichar import pack_unichar
    charlist = []
    node = w_unicode._node
    iter = rope.ItemIterator(node)
    for idx in range(node.length()):
        unich = unichr(iter.nextint())
        pack_unichar(unich, charlist)
    from pypy.interpreter.buffer import StringBuffer
    return space.wrap(StringBuffer(''.join(charlist)))
Пример #9
0
def buffer__RopeUnicode(space, w_unicode):
    from pypy.rlib.rstruct.unichar import pack_unichar, UNICODE_SIZE
    node = w_unicode._node
    iter = rope.ItemIterator(node)
    length = node.length()
    builder = StringBuilder(length * UNICODE_SIZE)
    for idx in range(length):
        unich = unichr(iter.nextint())
        pack_unichar(unich, builder)
    from pypy.interpreter.buffer import StringBuffer
    return space.wrap(StringBuffer(builder.build()))
Пример #10
0
def unicode_capitalize__RopeUnicode(space, w_self):
    input = w_self._node
    length = input.length()
    if length == 0:
        return w_self
    result = [u'\0'] * length
    iter = rope.ItemIterator(input)
    result[0] = unichr(unicodedb.toupper(iter.nextint()))
    for i in range(1, length):
        result[i] = unichr(unicodedb.tolower(iter.nextint()))
    return W_RopeUnicodeObject(rope.rope_from_unicharlist(result))
Пример #11
0
def unicode_isupper__RopeUnicode(space, w_unicode):
    cased = False
    iter = rope.ItemIterator(w_unicode._node)
    while 1:
        try:
            ch = iter.nextint()
        except StopIteration:
            return space.newbool(cased)
        if (unicodedb.islower(ch) or unicodedb.istitle(ch)):
            return space.w_False
        if not cased and unicodedb.isupper(ch):
            cased = True
Пример #12
0
def repr__Rope(space, w_str):
    node = w_str._node
    length = node.length()

    i = 0
    buf = [' '] * (length * 4 + 2)  # safely overallocate

    quote = "'"
    if (rope.find_int(node, ord(quote)) != -1
            and rope.find_int(node, ord('"')) == -1):
        quote = '"'

    buf[0] = quote

    iter = rope.ItemIterator(node)
    while 1:
        try:
            c = iter.nextchar()
            i += 1
        except StopIteration:
            break
        bs_char = None  # character quoted by backspace

        if c == '\\' or c == quote:
            bs_char = c
        elif c == '\t':
            bs_char = 't'
        elif c == '\r':
            bs_char = 'r'
        elif c == '\n':
            bs_char = 'n'
        elif not '\x20' <= c < '\x7f':
            n = ord(c)
            buf[i] = '\\'
            i += 1
            buf[i] = 'x'
            i += 1
            buf[i] = "0123456789abcdef"[n >> 4]
            i += 1
            buf[i] = "0123456789abcdef"[n & 0xF]
        else:
            buf[i] = c

        if bs_char is not None:
            buf[i] = '\\'
            i += 1
            buf[i] = bs_char

    i += 1
    buf[i] = quote

    return W_RopeObject(rope.rope_from_charlist(buf[:i + 1]))
Пример #13
0
def str_isupper__Rope(space, w_self):
    """Return True if all cased characters in S are uppercase and there is
at least one cased character in S, False otherwise."""
    l = w_self._node.length()
    
    if l == 0:
        return space.w_False
    cased = False
    iter = rope.ItemIterator(w_self._node)
    for idx in range(l):
        c = iter.nextchar()
        if c.islower():
            return space.w_False
        elif not cased and c.isupper():
            cased = True
    return space.newbool(cased)
Пример #14
0
def unicode_title__RopeUnicode(space, w_self):
    input = w_self._node
    length = input.length()
    if length == 0:
        return w_self
    result = [u'\0'] * length
    iter = rope.ItemIterator(input)

    previous_is_cased = False
    for i in range(input.length()):
        unichar = iter.nextint()
        if previous_is_cased:
            result[i] = unichr(unicodedb.tolower(unichar))
        else:
            result[i] = unichr(unicodedb.totitle(unichar))
        previous_is_cased = unicodedb.iscased(unichar)
    return W_RopeUnicodeObject(rope.rope_from_unicharlist(result))
Пример #15
0
def str_title__Rope(space, w_self):
    node = w_self._node
    length = node.length()
    buffer = [' '] * length
    prev_letter = ' '

    iter = rope.ItemIterator(node)
    for pos in range(0, length):
        ch = iter.nextchar()
        if not prev_letter.isalpha():
            buffer[pos] = _upper(ch)
        else:
            buffer[pos] = _lower(ch)

        prev_letter = buffer[pos]

    return W_RopeObject(rope.rope_from_charlist(buffer))
Пример #16
0
def unicode_istitle__RopeUnicode(space, w_unicode):
    cased = False
    previous_is_cased = False
    iter = rope.ItemIterator(w_unicode._node)
    while 1:
        try:
            ch = iter.nextint()
        except StopIteration:
            return space.newbool(cased)
        if (unicodedb.isupper(ch) or unicodedb.istitle(ch)):
            if previous_is_cased:
                return space.w_False
            previous_is_cased = cased = True
        elif unicodedb.islower(ch):
            if not previous_is_cased:
                return space.w_False
            previous_is_cased = cased = True
        else:
            previous_is_cased = False
Пример #17
0
def str_istitle__Rope(space, w_self):
    """Return True if S is a titlecased string and there is at least one
character in S, i.e. uppercase characters may only follow uncased
characters and lowercase characters only cased ones. Return False
otherwise."""
    cased = False
    previous_is_cased = False

    iter = rope.ItemIterator(w_self._node)
    for pos in range(0, w_self._node.length()):
        ch = iter.nextchar()
        if ch.isupper():
            if previous_is_cased:
                return space.w_False
            previous_is_cased = True
            cased = True
        elif ch.islower():
            if not previous_is_cased:
                return space.w_False
            cased = True
        else:
            previous_is_cased = False

    return space.newbool(cased)
Пример #18
0
def str_capitalize__Rope(space, w_self):
    node = w_self._node
    length = node.length()
    buffer = [' '] * length
    if length > 0:
        iter = rope.ItemIterator(node)
        ch = iter.nextchar()
        if ch.islower():
            o = ord(ch) - 32
            buffer[0] = chr(o)
        else:
            buffer[0] = ch

        for i in range(1, length):
            ch = iter.nextchar()
            if ch.isupper():
                o = ord(ch) + 32
                buffer[i] = chr(o)
            else:
                buffer[i] = ch
    else:
        return W_RopeObject.EMPTY

    return W_RopeObject(rope.rope_from_charlist(buffer))
Пример #19
0
def repr__RopeUnicode(space, w_unicode):
    hexdigits = "0123456789abcdef"
    node = w_unicode._node
    size = node.length()

    singlequote = doublequote = False
    iter = rope.ItemIterator(node)
    for i in range(size):
        c = iter.nextunichar()
        if singlequote and doublequote:
            break
        if c == u'\'':
            singlequote = True
        elif c == u'"':
            doublequote = True
    if singlequote and not doublequote:
        quote = '"'
    else:
        quote = '\''
    result = ['u', quote]
    iter = rope.ItemIterator(node)
    j = 0
    while j < size:
        code = iter.nextint()
        if code >= 0x10000:
            result.extend([
                '\\',
                "U",
                hexdigits[(code >> 28) & 0xf],
                hexdigits[(code >> 24) & 0xf],
                hexdigits[(code >> 20) & 0xf],
                hexdigits[(code >> 16) & 0xf],
                hexdigits[(code >> 12) & 0xf],
                hexdigits[(code >> 8) & 0xf],
                hexdigits[(code >> 4) & 0xf],
                hexdigits[(code >> 0) & 0xf],
            ])
            j += 1
            continue
        if code >= 0xD800 and code < 0xDC00:
            if j < size - 1:
                code2 = iter.nextint()
                # XXX this is wrong: if the next if is false,
                # code2 is lost
                if code2 >= 0xDC00 and code2 <= 0xDFFF:
                    code = (((code & 0x03FF) << 10) |
                            (code2 & 0x03FF)) + 0x00010000
                    result.extend([
                        '\\',
                        "U",
                        hexdigits[(code >> 28) & 0xf],
                        hexdigits[(code >> 24) & 0xf],
                        hexdigits[(code >> 20) & 0xf],
                        hexdigits[(code >> 16) & 0xf],
                        hexdigits[(code >> 12) & 0xf],
                        hexdigits[(code >> 8) & 0xf],
                        hexdigits[(code >> 4) & 0xf],
                        hexdigits[(code >> 0) & 0xf],
                    ])
                    j += 2
                    continue

        if code >= 0x100:
            result.extend([
                '\\',
                "u",
                hexdigits[(code >> 12) & 0xf],
                hexdigits[(code >> 8) & 0xf],
                hexdigits[(code >> 4) & 0xf],
                hexdigits[(code >> 0) & 0xf],
            ])
            j += 1
            continue
        if code == ord('\\') or code == ord(quote):
            result.append('\\')
            result.append(chr(code))
            j += 1
            continue
        if code == ord('\t'):
            result.append('\\')
            result.append('t')
            j += 1
            continue
        if code == ord('\r'):
            result.append('\\')
            result.append('r')
            j += 1
            continue
        if code == ord('\n'):
            result.append('\\')
            result.append('n')
            j += 1
            continue
        if code < ord(' ') or code >= 0x7f:
            result.extend([
                '\\',
                "x",
                hexdigits[(code >> 4) & 0xf],
                hexdigits[(code >> 0) & 0xf],
            ])
            j += 1
            continue
        result.append(chr(code))
        j += 1
    result.append(quote)
    return W_RopeObject(rope.rope_from_charlist(result))
Пример #20
0
 def __init__(w_self, w_rope, index=0):
     w_self.node = node = w_rope._node
     w_self.item_iter = rope.ItemIterator(node)
     w_self.index = index