示例#1
0
def charmap_decode(space, s, errors="strict", w_mapping=None):
    size = len(s)
    # Default to Latin-1
    if space.is_true(space.is_(w_mapping, space.w_None)):
        return latin_1_decode(space, s, errors, space.w_False)

    if (size == 0):
        return space.newtuple([space.wrap(u''), space.wrap(0)])

    # fast path for all the stuff in the encodings module
    if space.is_true(space.isinstance(w_mapping, space.w_tuple)):
        mapping_w = space.fixedview(w_mapping)
    else:
        mapping_w = None

    builder = UnicodeBuilder(size)
    inpos = 0
    while (inpos < len(s)):
        #/* Get mapping_w (char ordinal -> integer, Unicode char or None) */
        ch = s[inpos]
        w_x = _extract_from_mapping(space, mapping_w, w_mapping, ch)
        if w_x is not None and _append_unicode(space, builder, w_x):
            inpos += 1
            continue
        state = space.fromcache(CodecState)
        next, inpos = state.decode_error_handler(
            errors, "charmap", "character maps to <undefined>", s, inpos,
            inpos + 1)
        builder.append(next)
    res = builder.build()
    return space.newtuple([space.wrap(res), space.wrap(size)])
示例#2
0
class W_UnicodeBuilder(Wrappable):
    def __init__(self, space, size):
        if size < 0:
            self.builder = UnicodeBuilder()
        else:
            self.builder = UnicodeBuilder(size)
        self.done = False

    def _check_done(self, space):
        if self.done:
            raise OperationError(space.w_ValueError, space.wrap("Can't operate on a done builder"))

    @unwrap_spec(size=int)
    def descr__new__(space, w_subtype, size=-1):
        return W_UnicodeBuilder(space, size)

    @unwrap_spec(s=unicode)
    def descr_append(self, space, s):
        self._check_done(space)
        self.builder.append(s)

    @unwrap_spec(s=unicode, start=int, end=int)
    def descr_append_slice(self, space, s, start, end):
        self._check_done(space)
        if not 0 <= start <= end <= len(s):
            raise OperationError(space.w_ValueError, space.wrap("bad start/stop"))
        self.builder.append_slice(s, start, end)

    def descr_build(self, space):
        self._check_done(space)
        w_s = space.wrap(self.builder.build())
        self.done = True
        return w_s
示例#3
0
def charmap_decode(space, s, errors="strict", w_mapping=None):
    size = len(s)
    # Default to Latin-1
    if space.is_true(space.is_(w_mapping, space.w_None)):
        return latin_1_decode(space, s, errors, space.w_False)

    if (size == 0):
        return space.newtuple([space.wrap(u''), space.wrap(0)])
    
    # fast path for all the stuff in the encodings module
    if space.is_true(space.isinstance(w_mapping, space.w_tuple)):
        mapping_w = space.fixedview(w_mapping)
    else:
        mapping_w = None

    builder = UnicodeBuilder(size)
    inpos = 0
    while (inpos < len(s)):
        #/* Get mapping_w (char ordinal -> integer, Unicode char or None) */
        ch = s[inpos]
        w_x = _extract_from_mapping(space, mapping_w, w_mapping, ch)
        if w_x is not None and _append_unicode(space, builder, w_x):
            inpos += 1
            continue
        state = space.fromcache(CodecState)
        next, inpos = state.decode_error_handler(errors, "charmap",
                   "character maps to <undefined>", s, inpos, inpos+1)
        builder.append(next)
    res = builder.build()
    return space.newtuple([space.wrap(res), space.wrap(size)])
示例#4
0
def test_unicode_builder():
    s = UnicodeBuilder()
    s.append(u'a')
    s.append(u'abc')
    s.append_slice(u'abcdef', 1, 2)
    s.append_multiple_char('d', 4)
    assert s.build() == 'aabcbdddd'
    assert isinstance(s.build(), unicode)
示例#5
0
def test_unicode_builder():
    s = UnicodeBuilder()
    s.append(u'a')
    s.append(u'abc')
    s.append_slice(u'abcdef', 1, 2)
    s.append_multiple_char('d', 4)
    assert s.build() == 'aabcbdddd'
    assert isinstance(s.build(), unicode)
示例#6
0
def unicode_capitalize__Unicode(space, w_self):
    input = w_self._value
    if len(input) == 0:
        return W_UnicodeObject.EMPTY
    builder = UnicodeBuilder(len(input))
    builder.append(unichr(unicodedb.toupper(ord(input[0]))))
    for i in range(1, len(input)):
        builder.append(unichr(unicodedb.tolower(ord(input[i]))))
    return W_UnicodeObject(builder.build())
示例#7
0
def unicode_title__Unicode(space, w_self):
    input = w_self._value
    if len(input) == 0:
        return w_self
    builder = UnicodeBuilder(len(input))

    previous_is_cased = False
    for i in range(len(input)):
        unichar = ord(input[i])
        if previous_is_cased:
            builder.append(unichr(unicodedb.tolower(unichar)))
        else:
            builder.append(unichr(unicodedb.totitle(unichar)))
        previous_is_cased = unicodedb.iscased(unichar)
    return W_UnicodeObject(builder.build())
示例#8
0
def backslashreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
        w_end = space.getattr(w_exc, space.wrap('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            oc = ord(obj[pos])
            num = hex(oc)
            if (oc >= 0x10000):
                builder.append(u"\\U")
                zeros = 8
            elif (oc >= 0x100):
                builder.append(u"\\u")
                zeros = 4
            else:
                builder.append(u"\\x")
                zeros = 2
            lnum = len(num)
            nb = zeros + 2 - lnum # num starts with '0x'
            if nb > 0:
                builder.append_multiple_char(u'0', nb)
            builder.append_slice(unicode(num), 2, lnum)
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        typename = space.type(w_exc).getname(space, '?')
        raise operationerrfmt(space.w_TypeError,
            "don't know how to handle %s in error callback", typename)
示例#9
0
def backslashreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
        w_end = space.getattr(w_exc, space.wrap('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            oc = ord(obj[pos])
            num = hex(oc)
            if (oc >= 0x10000):
                builder.append(u"\\U")
                zeros = 8
            elif (oc >= 0x100):
                builder.append(u"\\u")
                zeros = 4
            else:
                builder.append(u"\\x")
                zeros = 2
            lnum = len(num)
            nb = zeros + 2 - lnum # num starts with '0x'
            if nb > 0:
                builder.append_multiple_char(u'0', nb)
            builder.append_slice(unicode(num), 2, lnum)
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        typename = space.type(w_exc).getname(space)
        raise operationerrfmt(space.w_TypeError,
            "don't know how to handle %s in error callback", typename)
示例#10
0
 def func():
     s = UnicodeBuilder()
     s.append(u'a')
     s.append(u'abc')
     s.append(u'abcdef')
     s.append_slice(u'abc', 1, 2)
     s.append_multiple_char(u'u', 4)
     return s.build()
示例#11
0
 def func():
     s = UnicodeBuilder()
     s.append(u'a')
     s.append(u'abc')
     s.append(u'abcdef')
     s.append_slice(u'abc', 1, 2)
     s.append_multiple_char(u'u', 4)
     return s.build()
示例#12
0
def unicode_swapcase__Unicode(space, w_self):
    input = w_self._value
    builder = UnicodeBuilder(len(input))
    for i in range(len(input)):
        unichar = ord(input[i])
        if unicodedb.islower(unichar):
            builder.append(unichr(unicodedb.toupper(unichar)))
        elif unicodedb.isupper(unichar):
            builder.append(unichr(unicodedb.tolower(unichar)))
        else:
            builder.append(input[i])
    return W_UnicodeObject(builder.build())
示例#13
0
def _unicode_join_many_items(space, w_self, list_w, size):
    self = w_self._value
    sb = UnicodeBuilder()
    for i in range(size):
        if self and i != 0:
            sb.append(self)
        w_s = list_w[i]
        if isinstance(w_s, W_UnicodeObject):
            # shortcut for performance
            sb.append(w_s._value)
        else:
            try:
                sb.append(space.unicode_w(w_s))
            except OperationError, e:
                if not e.match(space, space.w_TypeError):
                    raise
                raise operationerrfmt(space.w_TypeError, "sequence item %d: expected string or Unicode", i)
示例#14
0
def _unicode_join_many_items(space, w_self, list_w, size):
    self = w_self._value
    sb = UnicodeBuilder()
    for i in range(size):
        if self and i != 0:
            sb.append(self)
        w_s = list_w[i]
        if isinstance(w_s, W_UnicodeObject):
            # shortcut for performance
            sb.append(w_s._value)
        else:
            try:
                sb.append(space.unicode_w(w_s))
            except OperationError, e:
                if not e.match(space, space.w_TypeError):
                    raise
                raise operationerrfmt(space.w_TypeError,
                    "sequence item %d: expected string or Unicode", i)
示例#15
0
def xmlcharrefreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap("object")))
        start = space.int_w(space.getattr(w_exc, space.wrap("start")))
        w_end = space.getattr(w_exc, space.wrap("end"))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            ch = obj[pos]
            builder.append(u"&#")
            builder.append(unicode(str(ord(ch))))
            builder.append(u";")
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        typename = space.type(w_exc).getname(space)
        raise operationerrfmt(space.w_TypeError, "don't know how to handle %s in error callback", typename)
示例#16
0
def xmlcharrefreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
        w_end = space.getattr(w_exc, space.wrap('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            ch = obj[pos]
            builder.append(u"&#")
            builder.append(unicode(str(ord(ch))))
            builder.append(u";")
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        typename = space.type(w_exc).getname(space)
        raise operationerrfmt(space.w_TypeError,
            "don't know how to handle %s in error callback", typename)
示例#17
0
 def func():
     s = UnicodeBuilder()
     s.append(u"a")
     s.append(u"abc")
     return s.getlength()
示例#18
0
def unicode_upper__Unicode(space, w_self):
    input = w_self._value
    builder = UnicodeBuilder(len(input))
    for i in range(len(input)):
        builder.append(unichr(unicodedb.toupper(ord(input[i]))))
    return W_UnicodeObject(builder.build())
示例#19
0
 def func():
     s = UnicodeBuilder()
     s.append(u"a")
     s.append(u"abc")
     return s.getlength()
示例#20
0
                     jit.loop_unrolling_heuristic(list_w, size))
def _unicode_join_many_items(space, w_self, list_w, size):
    self = w_self._value
    prealloc_size = len(self) * (size - 1)
    for i in range(size):
        try:
            prealloc_size += len(space.unicode_w(list_w[i]))
        except OperationError, e:
            if not e.match(space, space.w_TypeError):
                raise
            raise operationerrfmt(space.w_TypeError,
                        "sequence item %d: expected string or Unicode", i)
    sb = UnicodeBuilder(prealloc_size)
    for i in range(size):
        if self and i != 0:
            sb.append(self)
        w_s = list_w[i]
        sb.append(space.unicode_w(w_s))
    return space.wrap(sb.build())

def hash__Unicode(space, w_uni):
    s = w_uni._value
    if space.config.objspace.std.withrope:
        # be compatible with the special ropes hash
        # XXX no caching
        if len(s) == 0:
            return space.wrap(0)
        x = 0
        for c in s:
            x = intmask((1000003 * x) + ord(c))
        x <<= 1