Пример #1
0
def backslashreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
        w_end = space.getattr(w_exc, space.wrap('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            oc = ord(obj[pos])
            num = hex(oc)
            if (oc >= 0x10000):
                builder.append(u"\\U")
                zeros = 8
            elif (oc >= 0x100):
                builder.append(u"\\u")
                zeros = 4
            else:
                builder.append(u"\\x")
                zeros = 2
            lnum = len(num)
            nb = zeros + 2 - lnum # num starts with '0x'
            if nb > 0:
                builder.append_multiple_char(u'0', nb)
            builder.append_slice(unicode(num), 2, lnum)
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        raise oefmt(space.w_TypeError,
                    "don't know how to handle %T in error callback", w_exc)
Пример #2
0
class W_UnicodeBuilder(W_Root):
    def __init__(self, space, size):
        if size < 0:
            self.builder = UnicodeBuilder()
        else:
            self.builder = UnicodeBuilder(size)

    @unwrap_spec(size=int)
    def descr__new__(space, w_subtype, size=-1):
        return W_UnicodeBuilder(space, size)

    @unwrap_spec(s=unicode)
    def descr_append(self, space, s):
        self.builder.append(s)

    @unwrap_spec(s=unicode, start=int, end=int)
    def descr_append_slice(self, space, s, start, end):
        if not 0 <= start <= end <= len(s):
            raise oefmt(space.w_ValueError, "bad start/stop")
        self.builder.append_slice(s, start, end)

    def descr_build(self, space):
        w_s = space.newunicode(self.builder.build())
        # after build(), we can continue to append more strings
        # to the same builder.  This is supported since
        # 2ff5087aca28 in RPython.
        return w_s

    def descr_len(self, space):
        if self.builder is None:
            raise oefmt(space.w_ValueError, "no length of built builder")
        return space.newint(self.builder.getlength())
Пример #3
0
def backslashreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
        w_end = space.getattr(w_exc, space.wrap('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            oc = ord(obj[pos])
            num = hex(oc)
            if (oc >= 0x10000):
                builder.append(u"\\U")
                zeros = 8
            elif (oc >= 0x100):
                builder.append(u"\\u")
                zeros = 4
            else:
                builder.append(u"\\x")
                zeros = 2
            lnum = len(num)
            nb = zeros + 2 - lnum  # num starts with '0x'
            if nb > 0:
                builder.append_multiple_char(u'0', nb)
            builder.append_slice(unicode(num), 2, lnum)
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        raise oefmt(space.w_TypeError,
                    "don't know how to handle %T in error callback", w_exc)
Пример #4
0
 def func():
     s = UnicodeBuilder(32)
     s.append(u'a')
     s.append(u'abc')
     s.append(u'abcdef')
     s.append_slice(u'abc', 1, 2)
     s.append_multiple_char(u'u', 40)
     return s.build()
Пример #5
0
 def func():
     s = UnicodeBuilder(32)
     s.append(u'a')
     s.append(u'abc')
     s.append(u'abcdef')
     s.append_slice(u'abc', 1, 2)
     s.append_multiple_char(u'u', 40)
     return s.build()
Пример #6
0
def test_unicode_builder():
    s = UnicodeBuilder()
    s.append(u'a')
    s.append(u'abc')
    s.append_slice(u'abcdef', 1, 2)
    assert s.getlength() == len('aabcb')
    s.append_multiple_char(u'd', 4)
    assert s.build() == 'aabcbdddd'
    assert isinstance(s.build(), unicode)
Пример #7
0
 def f(n):
     while n > 0:
         jitdriver.jit_merge_point(n=n)
         sb = UnicodeBuilder()
         sb.append_slice(u"abcdefghij", 1, n)
         sb.append_slice(u"abcdefghij", 0, n)
         s = sb.build()
         if len(s) != 2 * n - 1: raise ValueError
         n -= 1
     return n
Пример #8
0
def test_unicode_builder():
    s = UnicodeBuilder()
    s.append(u'a')
    s.append(u'abc')
    s.append_slice(u'abcdef', 1, 2)
    assert s.getlength() == len('aabcb')
    s.append_multiple_char(u'd', 4)
    result = s.build()
    assert result == 'aabcbdddd'
    assert isinstance(result, unicode)
Пример #9
0
def test_unicode_builder():
    s = UnicodeBuilder()
    s.append(u"a")
    s.append(u"abc")
    s.append_slice(u"abcdef", 1, 2)
    assert s.getlength() == len("aabcb")
    s.append_multiple_char(u"d", 4)
    result = s.build()
    assert result == "aabcbdddd"
    assert isinstance(result, unicode)
Пример #10
0
 def f(n):
     while n > 0:
         jitdriver.jit_merge_point(n=n)
         sb = UnicodeBuilder()
         sb.append_slice(u"fOo!", 1, 3)
         s = sb.build()
         if len(s) != 2: raise ValueError
         if s[0] != u"O": raise ValueError
         if s[1] != u"o": raise ValueError
         n -= 1
     return n
Пример #11
0
        def format(self):
            lgt = len(self.fmt) + 4 * len(self.values_w) + 10
            if do_unicode:
                result = UnicodeBuilder(lgt)
            else:
                result = StringBuilder(lgt)
            self.result = result
            while True:
                # fast path: consume as many characters as possible
                fmt = self.fmt
                i = i0 = self.fmtpos
                while i < len(fmt):
                    if fmt[i] == '%':
                        break
                    i += 1
                else:
                    result.append_slice(fmt, i0, len(fmt))
                    break  # end of 'fmt' string
                result.append_slice(fmt, i0, i)
                self.fmtpos = i + 1

                # interpret the next formatter
                w_value = self.parse_fmt()
                c = self.peekchr()
                self.forward()
                if c == '%':
                    self.std_wp(const('%'))
                    continue
                if w_value is None:
                    w_value = self.nextinputvalue()

                # dispatch on the formatter
                # (this turns into a switch after translation)
                for c1 in FORMATTER_CHARS:
                    if c == c1:
                        # 'c1' is an annotation constant here,
                        # so this getattr() is ok
                        do_fmt = getattr(self, 'fmt_' + c1)
                        do_fmt(w_value)
                        break
                else:
                    self.unknown_fmtchar()

            self.checkconsumed()
            return result.build()
Пример #12
0
        def format(self):
            lgt = len(self.fmt) + 4 * len(self.values_w) + 10
            if do_unicode:
                result = UnicodeBuilder(lgt)
            else:
                result = StringBuilder(lgt)
            self.result = result
            while True:
                # fast path: consume as many characters as possible
                fmt = self.fmt
                i = i0 = self.fmtpos
                while i < len(fmt):
                    if fmt[i] == '%':
                        break
                    i += 1
                else:
                    result.append_slice(fmt, i0, len(fmt))
                    break     # end of 'fmt' string
                result.append_slice(fmt, i0, i)
                self.fmtpos = i + 1

                # interpret the next formatter
                w_value = self.parse_fmt()
                c = self.peekchr()
                self.forward()
                if c == '%':
                    self.std_wp(const('%'))
                    continue
                if w_value is None:
                    w_value = self.nextinputvalue()

                # dispatch on the formatter
                # (this turns into a switch after translation)
                for c1 in FORMATTER_CHARS:
                    if c == c1:
                        # 'c1' is an annotation constant here,
                        # so this getattr() is ok
                        do_fmt = getattr(self, 'fmt_' + c1)
                        do_fmt(w_value)
                        break
                else:
                    self.unknown_fmtchar()

            self.checkconsumed()
            return result.build()