Exemplo n.º 1
0
def backslashreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if (space.isinstance_w(w_exc, space.w_UnicodeEncodeError)
            or space.isinstance_w(w_exc, space.w_UnicodeTranslateError)):
        obj = space.realunicode_w(space.getattr(w_exc,
                                                space.newtext('object')))
        start = space.int_w(space.getattr(w_exc, space.newtext('start')))
        w_end = space.getattr(w_exc, space.newtext('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            oc = ord(obj[pos])
            raw_unicode_escape_helper_unicode(builder, oc)
            pos += 1
        return space.newtuple([space.newunicode(builder.build()), w_end])
    elif space.isinstance_w(w_exc, space.w_UnicodeDecodeError):
        obj = space.bytes_w(space.getattr(w_exc, space.newtext('object')))
        start = space.int_w(space.getattr(w_exc, space.newtext('start')))
        w_end = space.getattr(w_exc, space.newtext('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            oc = ord(obj[pos])
            raw_unicode_escape_helper_unicode(builder, oc)
            pos += 1
        return space.newtuple([space.newunicode(builder.build()), w_end])
    else:
        raise oefmt(space.w_TypeError,
                    "don't know how to handle %T in error callback", w_exc)
Exemplo n.º 2
0
 def entry_point(argv):
     b = UnicodeBuilder(32)
     for x in to_do:
         if x < 1500:
             print "``%s''" % str(b.build())
             if x < 1000:
                 b = UnicodeBuilder(32)
         elif x < 20000:
             b.append(unichr(32 + (x & 63)))
         elif x < 30000:
             b.append_multiple_char(unichr(32 + (x & 63)), x % 93)
         else:
             b.append(unicode(str(x)))
     return 0
Exemplo n.º 3
0
    def read_w(self, space, w_size=None):
        self._check_attached(space)
        self._check_closed(space)
        if not self.w_decoder:
            raise oefmt(space.w_IOError, "not readable")

        size = convert_size(space, w_size)
        self._writeflush(space)

        if size < 0:
            # Read everything
            w_bytes = space.call_method(self.w_buffer, "read")
            w_decoded = space.call_method(self.w_decoder, "decode", w_bytes, space.w_True)
            check_decoded(space, w_decoded)
            w_result = space.newunicode(self.decoded.get_chars(-1))
            w_final = space.add(w_result, w_decoded)
            self.snapshot = None
            return w_final

        remaining = size
        builder = UnicodeBuilder(size)

        # Keep reading chunks until we have n characters to return
        while remaining > 0:
            if not self._ensure_data(space):
                break
            data = self.decoded.get_chars(remaining)
            builder.append(data)
            remaining -= len(data)

        return space.newunicode(builder.build())
Exemplo n.º 4
0
 def writerow(self, w_fields):
     """Construct and write a CSV record from a sequence of fields.
     Non-string elements will be converted to string."""
     space = self.space
     fields_w = space.listview(w_fields)
     dialect = self.dialect
     rec = UnicodeBuilder(80)
     #
     for field_index in range(len(fields_w)):
         w_field = fields_w[field_index]
         if space.is_w(w_field, space.w_None):
             field = u""
         elif space.isinstance_w(w_field, space.w_float):
             field = space.unicode_w(space.repr(w_field))
         else:
             field = space.unicode_w(space.str(w_field))
         #
         if dialect.quoting == QUOTE_NONNUMERIC:
             try:
                 space.float_w(w_field)  # is it an int/long/float?
                 quoted = False
             except OperationError, e:
                 if e. async (space):
                     raise
                 quoted = True
Exemplo n.º 5
0
 def __init__(self, ub=None, newline=StrObject(u"\n"), context=None):
     self.ub = ub or UnicodeBuilder()
     self.newline = newline
     if context is None:
         self.context = {}
     else:
         self.context = context
Exemplo n.º 6
0
def escape_string(string):
    out = UnicodeBuilder()
    out.append(u'"')
    for ch in string:
        n = ord(ch)
        if 0x20 <= n and n <= 0x7E or 0xFF < n: # remove the last part in cond if you don't want
            if ch == u'\\':                     # unicode printed out for some reason.
                ch = u'\\\\'
            elif ch == u'"':
                ch = u'\\"'
        else:
            #if n <= 0xFF:
            c = u"0123456789abcdef"[n >> 4  & 15]
            d = u"0123456789abcdef"[n       & 15]
            ch = u'x' + c + d
            #else: # for unicode escapes.
            #    a = u"0123456789abcdef"[n >> 12]
            #    b = u"0123456789abcdef"[n >> 8  & 15]
            #    c = u"0123456789abcdef"[n >> 4  & 15]
            #    d = u"0123456789abcdef"[n       & 15]
            #    ch = u'u' + a + b + c + d
            ch = u'\\' + character_escapes.get(n, ch)
        out.append(ch)
    out.append(u'"')
    return out.build()
Exemplo n.º 7
0
 def fn():
     return (compute_unique_id("foo"),
             compute_unique_id(u"bar"),
             compute_unique_id([1]),
             compute_unique_id({"foo": 3}),
             compute_unique_id(StringBuilder()),
             compute_unique_id(UnicodeBuilder()))
Exemplo n.º 8
0
def rawwcharp2unicoden(wcp, maxlen):
    b = UnicodeBuilder(maxlen)
    i = 0
    while i < maxlen and rffi.cast(lltype.Signed, wcp[i]) != 0:
        b.append(code_to_unichr(wcp[i]))
        i += 1
    return assert_str0(b.build())
Exemplo n.º 9
0
def char_utf_8_length(char):
    # same as (bytes-length (string->bytes/utf-8 (string char)))
    builder = UnicodeBuilder()
    builder.append(char.value)
    w_str = W_String.fromunicode(builder.build())
    w_bytes = values.W_Bytes.from_charlist(w_str.as_charlist_utf8())
    return values.W_Fixnum(w_bytes.length())
Exemplo n.º 10
0
def xmlcharrefreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        w_obj = space.getattr(w_exc, space.newtext('object'))
        obj = space.realunicode_w(w_obj)
        start = space.int_w(space.getattr(w_exc, space.newtext('start')))
        w_end = space.getattr(w_exc, space.newtext('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            code = ord(obj[pos])
            if (MAXUNICODE == 0xffff and 0xD800 <= code <= 0xDBFF
                    and pos + 1 < end
                    and 0xDC00 <= ord(obj[pos + 1]) <= 0xDFFF):
                code = (code & 0x03FF) << 10
                code |= ord(obj[pos + 1]) & 0x03FF
                code += 0x10000
                pos += 1
            builder.append(u"&#")
            builder.append(unicode(str(code)))
            builder.append(u";")
            pos += 1
        return space.newtuple([space.newunicode(builder.build()), w_end])
    else:
        raise oefmt(space.w_TypeError,
                    "don't know how to handle %T in error callback", w_exc)
Exemplo n.º 11
0
def string_append(args):
    if not args:
        return W_String.fromascii("")
    builder = StringBuilder()
    unibuilder = None
    ascii_idx = 0
    try:
        for ascii_idx in range(len(args)):
            arg = args[ascii_idx]
            if not isinstance(arg, W_String):
                raise SchemeException("string-append: expected a string")
            builder.append(arg.as_str_ascii())
    except ValueError:
        unibuilder = UnicodeBuilder()
        unibuilder.append(unicode(builder.build()))
        builder = None
        for i in range(ascii_idx, len(args)):
            arg = args[i]
            if not isinstance(arg, W_String):
                raise SchemeException("string-append: expected a string")
            unibuilder.append(arg.as_unicode())
    if unibuilder is None:
        assert builder is not None
        return W_String.fromascii(builder.build())
    else:
        assert unibuilder is not None
        return W_String.fromunicode(unibuilder.build())
Exemplo n.º 12
0
def namereplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        w_obj = space.getattr(w_exc, space.newtext('object'))
        obj = space.realunicode_w(w_obj)
        start = space.int_w(space.getattr(w_exc, space.newtext('start')))
        w_end = space.getattr(w_exc, space.newtext('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            oc = ord(obj[pos])
            try:
                name = unicodedb.name(oc)
            except KeyError:
                raw_unicode_escape_helper_unicode(builder, oc)
            else:
                builder.append(u'\\N{')
                builder.append(unicode(name))
                builder.append(u'}')
            pos += 1
        return space.newtuple([space.newunicode(builder.build()), w_end])
    else:
        raise oefmt(space.w_TypeError,
                    "don't know how to handle %T in error callback", w_exc)
Exemplo n.º 13
0
def backslashreplace_errors(space, w_exc):
    check_exception(space, w_exc)
    if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
        obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object')))
        start = space.int_w(space.getattr(w_exc, space.wrap('start')))
        w_end = space.getattr(w_exc, space.wrap('end'))
        end = space.int_w(w_end)
        builder = UnicodeBuilder()
        pos = start
        while pos < end:
            oc = ord(obj[pos])
            num = hex(oc)
            if (oc >= 0x10000):
                builder.append(u"\\U")
                zeros = 8
            elif (oc >= 0x100):
                builder.append(u"\\u")
                zeros = 4
            else:
                builder.append(u"\\x")
                zeros = 2
            lnum = len(num)
            nb = zeros + 2 - lnum  # num starts with '0x'
            if nb > 0:
                builder.append_multiple_char(u'0', nb)
            builder.append_slice(unicode(num), 2, lnum)
            pos += 1
        return space.newtuple([space.wrap(builder.build()), w_end])
    else:
        raise oefmt(space.w_TypeError,
                    "don't know how to handle %T in error callback", w_exc)
Exemplo n.º 14
0
 def upper(self, w_str):
     # copy paste from above, but the types are different
     value = self.unerase(w_str.get_storage())
     builder = UnicodeBuilder(len(value))
     for i, ch in enumerate(value):
         builder.append(unichr(unicodedb.toupper(ord(ch))))
     return W_MutableString(self, self.erase(list(builder.build())))
Exemplo n.º 15
0
def decode_json(action, ch, ctx):
    if action == 0x1:  # push list
        ctx.ds.append(space.List([]))
    # Push object to ds
    elif action == 0x2:  # push object
        ctx.ds.append(space.Dict())
    elif action == 0x3:  # pop & append
        val = ctx.ds.pop()
        top = ctx.ds[len(ctx.ds) - 1]
        assert isinstance(top, List)  # we can trust this.
        top.contents.append(val)
    elif action == 0x4:  # pop pop & setitem
        val = ctx.ds.pop()
        key = ctx.ds.pop()
        top = ctx.ds[len(ctx.ds) - 1]
        assert isinstance(top, Dict)  # again..
        top.data[key] = val
    elif action == 0x5:  # push null
        ctx.ds.append(space.null)
    elif action == 0x6:  # push true
        ctx.ds.append(space.true)
    elif action == 0x7:  # push false
        ctx.ds.append(space.false)
    elif action == 0x8:  # push string
        val = ctx.ss.build()
        ctx.ds.append(space.String(val))
        ctx.ss = UnicodeBuilder()
        ctx.es = UnicodeBuilder()
    elif action == 0x9:
        val = int(ctx.ss.build().encode('utf-8'))  # push int
        ctx.ds.append(space.Integer(val))
        ctx.ss = UnicodeBuilder()
    elif action == 0xA:
        val = float(ctx.ss.build().encode('utf-8'))  # push float
        ctx.ds.append(space.Float(val))
        ctx.ss = UnicodeBuilder()
    elif action == 0xB:  # push ch to ss
        ctx.ss.append(ch)
    elif action == 0xC:  # push ch to es
        ctx.es.append(ch)
    elif action == 0xD:  # push escape
        ctx.ss.append(unichr(escape_characters[ch]))
    elif action == 0xE:  # push unicode point
        ctx.ss.append(unichr(int(ctx.es.build().encode('utf-8'), 16)))
        ctx.es = UnicodeBuilder()
    else:  # This is very unlikely to happen.
        assert False, "JSON decoder bug"
Exemplo n.º 16
0
 def toLowerCase(self):
     # Use current size as a size hint. In the best case, characters
     # are one-to-one; in the next-best case, we overestimate and end
     # up with a couple bytes of slop.
     ub = UnicodeBuilder(len(self._s))
     for char in self._s:
         ub.append(unichr(unicodedb.tolower(ord(char))))
     return ub.build()
Exemplo n.º 17
0
 def func():
     s = UnicodeBuilder(32)
     s.append(u'a')
     s.append(u'abc')
     s.append(u'abcdef')
     s.append_slice(u'abc', 1, 2)
     s.append_multiple_char(u'u', 40)
     return s.build()
Exemplo n.º 18
0
 def f(n):
     while n > 0:
         jitdriver.jit_merge_point(n=n)
         sb = UnicodeBuilder()
         if sb.build() != u"":
             raise ValueError
         n -= 1
     return n
Exemplo n.º 19
0
def _parse_plain_flags(source):
    b = UnicodeBuilder(4)
    while True:
        ch = source.get()
        if ch == u":":
            break
        else:
            b.append(ch)
    return b.build()
Exemplo n.º 20
0
 def f(n):
     while n > 0:
         jitdriver.jit_merge_point(n=n)
         sb = UnicodeBuilder()
         sb.append(u"")
         s = sb.build()
         if len(s) != 0: raise ValueError
         n -= 1
     return n
Exemplo n.º 21
0
    def test_prebuilt_unicode_builder(self):
        s = UnicodeBuilder(100)
        s.append(u"abc")

        def f():
            return len(s.build())

        res = self.interpret(f, [])
        assert res == 3
Exemplo n.º 22
0
def test_unicode_builder():
    s = UnicodeBuilder()
    s.append(u'a')
    s.append(u'abc')
    s.append_slice(u'abcdef', 1, 2)
    assert s.getlength() == len('aabcb')
    s.append_multiple_char(u'd', 4)
    result = s.build()
    assert result == 'aabcbdddd'
    assert isinstance(result, unicode)
Exemplo n.º 23
0
def to_upper_case(this, args):
    from rpython.rlib.unicodedata import unicodedb

    string = this.to_string()
    builder = UnicodeBuilder(len(string))

    for char in string:
        builder.append(unichr(unicodedb.toupper(ord(char))))

    return builder.build()
Exemplo n.º 24
0
def from_char_code(this, args):
    builder = UnicodeBuilder(len(args))

    for arg in args:
        i = arg.ToInt16()
        c = unichr(i)
        builder.append(c)

    s = builder.build()
    return s
Exemplo n.º 25
0
 def f(n):
     while n > 0:
         jitdriver.jit_merge_point(n=n)
         sb = UnicodeBuilder()
         sb.append_slice(u"abcdefghij", 1, n)
         sb.append_slice(u"abcdefghij", 0, n)
         s = sb.build()
         if len(s) != 2 * n - 1: raise ValueError
         n -= 1
     return n
Exemplo n.º 26
0
    def readline_w(self, space, w_limit=None):
        self._check_attached(space)
        self._check_closed(space)
        self._writeflush(space)

        limit = convert_size(space, w_limit)
        remnant = None
        builder = UnicodeBuilder()
        while True:
            # First, get some data if necessary
            has_data = self._ensure_data(space)
            if not has_data:
                # end of file
                if remnant:
                    builder.append(remnant)
                break

            if remnant:
                assert not self.readtranslate and self.readnl == u'\r\n'
                assert self.decoded.pos == 0
                if remnant == u'\r' and self.decoded.text[0] == u'\n':
                    builder.append(u'\r\n')
                    self.decoded.pos = 1
                    remnant = None
                    break
                else:
                    builder.append(remnant)
                    remnant = None
                    continue

            if limit >= 0:
                remaining = limit - builder.getlength()
                assert remaining >= 0
            else:
                remaining = -1
            start = self.decoded.pos
            assert start >= 0
            found = self._scan_line_ending(remaining)
            end_scan = self.decoded.pos
            if end_scan > start:
                s = self.decoded.text[start:end_scan]
                builder.append(s)

            if found or (limit >= 0 and builder.getlength() >= limit):
                break

            # There may be some remaining chars we'll have to prepend to the
            # next chunk of data
            if not self.decoded.exhausted():
                remnant = self.decoded.get_chars(-1)
            # We have consumed the buffer
            self.decoded.reset()

        result = builder.build()
        return space.newunicode(result)
Exemplo n.º 27
0
 def f(n):
     s1 = unicode(str(n) * 16)
     while n > 0:
         jitdriver.jit_merge_point(n=n, s1=s1)
         sb = UnicodeBuilder(32)
         sb.append(s1)
         sb.append(u"\n\n")
         s = sb.build()
         if len(s) != 34: raise ValueError
         n -= 1
     return n
Exemplo n.º 28
0
 def f(n):
     while n > 0:
         jitdriver.jit_merge_point(n=n)
         sb = UnicodeBuilder()
         sb.append_slice(u"fOo!", 1, 3)
         s = sb.build()
         if len(s) != 2: raise ValueError
         if s[0] != u"O": raise ValueError
         if s[1] != u"o": raise ValueError
         n -= 1
     return n
Exemplo n.º 29
0
 def f(n):
     while n > 0:
         jitdriver.jit_merge_point(n=n)
         sb = UnicodeBuilder()
         sb.append(u"ab")
         s = sb.build()
         if len(s) != 2: raise ValueError
         if s[0] != u"a": raise ValueError
         if s[1] != u"b": raise ValueError
         n -= 1
     return n
Exemplo n.º 30
0
def _parse_count(source):
    b = UnicodeBuilder(2)
    while True:
        here = source.pos
        ch = source.get()
        if is_digit(ord(ch[0])):
            b.append(ch)
        else:
            source.pos = here
            break
    return b.build()