def charmap_decode(space, s, errors="strict", w_mapping=None): size = len(s) # Default to Latin-1 if space.is_true(space.is_(w_mapping, space.w_None)): return latin_1_decode(space, s, errors, space.w_False) if (size == 0): return space.newtuple([space.wrap(u''), space.wrap(0)]) # fast path for all the stuff in the encodings module if space.is_true(space.isinstance(w_mapping, space.w_tuple)): mapping_w = space.fixedview(w_mapping) else: mapping_w = None builder = UnicodeBuilder(size) inpos = 0 while (inpos < len(s)): #/* Get mapping_w (char ordinal -> integer, Unicode char or None) */ ch = s[inpos] w_x = _extract_from_mapping(space, mapping_w, w_mapping, ch) if w_x is not None and _append_unicode(space, builder, w_x): inpos += 1 continue state = space.fromcache(CodecState) next, inpos = state.decode_error_handler( errors, "charmap", "character maps to <undefined>", s, inpos, inpos + 1) builder.append(next) res = builder.build() return space.newtuple([space.wrap(res), space.wrap(size)])
class W_UnicodeBuilder(Wrappable): def __init__(self, space, size): if size < 0: self.builder = UnicodeBuilder() else: self.builder = UnicodeBuilder(size) self.done = False def _check_done(self, space): if self.done: raise OperationError(space.w_ValueError, space.wrap("Can't operate on a done builder")) @unwrap_spec(size=int) def descr__new__(space, w_subtype, size=-1): return W_UnicodeBuilder(space, size) @unwrap_spec(s=unicode) def descr_append(self, space, s): self._check_done(space) self.builder.append(s) @unwrap_spec(s=unicode, start=int, end=int) def descr_append_slice(self, space, s, start, end): self._check_done(space) if not 0 <= start <= end <= len(s): raise OperationError(space.w_ValueError, space.wrap("bad start/stop")) self.builder.append_slice(s, start, end) def descr_build(self, space): self._check_done(space) w_s = space.wrap(self.builder.build()) self.done = True return w_s
def charmap_decode(space, s, errors="strict", w_mapping=None): size = len(s) # Default to Latin-1 if space.is_true(space.is_(w_mapping, space.w_None)): return latin_1_decode(space, s, errors, space.w_False) if (size == 0): return space.newtuple([space.wrap(u''), space.wrap(0)]) # fast path for all the stuff in the encodings module if space.is_true(space.isinstance(w_mapping, space.w_tuple)): mapping_w = space.fixedview(w_mapping) else: mapping_w = None builder = UnicodeBuilder(size) inpos = 0 while (inpos < len(s)): #/* Get mapping_w (char ordinal -> integer, Unicode char or None) */ ch = s[inpos] w_x = _extract_from_mapping(space, mapping_w, w_mapping, ch) if w_x is not None and _append_unicode(space, builder, w_x): inpos += 1 continue state = space.fromcache(CodecState) next, inpos = state.decode_error_handler(errors, "charmap", "character maps to <undefined>", s, inpos, inpos+1) builder.append(next) res = builder.build() return space.newtuple([space.wrap(res), space.wrap(size)])
def test_unicode_builder(): s = UnicodeBuilder() s.append(u'a') s.append(u'abc') s.append_slice(u'abcdef', 1, 2) s.append_multiple_char('d', 4) assert s.build() == 'aabcbdddd' assert isinstance(s.build(), unicode)
def unicode_capitalize__Unicode(space, w_self): input = w_self._value if len(input) == 0: return W_UnicodeObject.EMPTY builder = UnicodeBuilder(len(input)) builder.append(unichr(unicodedb.toupper(ord(input[0])))) for i in range(1, len(input)): builder.append(unichr(unicodedb.tolower(ord(input[i])))) return W_UnicodeObject(builder.build())
def unicode_title__Unicode(space, w_self): input = w_self._value if len(input) == 0: return w_self builder = UnicodeBuilder(len(input)) previous_is_cased = False for i in range(len(input)): unichar = ord(input[i]) if previous_is_cased: builder.append(unichr(unicodedb.tolower(unichar))) else: builder.append(unichr(unicodedb.totitle(unichar))) previous_is_cased = unicodedb.iscased(unichar) return W_UnicodeObject(builder.build())
def backslashreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object'))) start = space.int_w(space.getattr(w_exc, space.wrap('start'))) w_end = space.getattr(w_exc, space.wrap('end')) end = space.int_w(w_end) builder = UnicodeBuilder() pos = start while pos < end: oc = ord(obj[pos]) num = hex(oc) if (oc >= 0x10000): builder.append(u"\\U") zeros = 8 elif (oc >= 0x100): builder.append(u"\\u") zeros = 4 else: builder.append(u"\\x") zeros = 2 lnum = len(num) nb = zeros + 2 - lnum # num starts with '0x' if nb > 0: builder.append_multiple_char(u'0', nb) builder.append_slice(unicode(num), 2, lnum) pos += 1 return space.newtuple([space.wrap(builder.build()), w_end]) else: typename = space.type(w_exc).getname(space, '?') raise operationerrfmt(space.w_TypeError, "don't know how to handle %s in error callback", typename)
def backslashreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object'))) start = space.int_w(space.getattr(w_exc, space.wrap('start'))) w_end = space.getattr(w_exc, space.wrap('end')) end = space.int_w(w_end) builder = UnicodeBuilder() pos = start while pos < end: oc = ord(obj[pos]) num = hex(oc) if (oc >= 0x10000): builder.append(u"\\U") zeros = 8 elif (oc >= 0x100): builder.append(u"\\u") zeros = 4 else: builder.append(u"\\x") zeros = 2 lnum = len(num) nb = zeros + 2 - lnum # num starts with '0x' if nb > 0: builder.append_multiple_char(u'0', nb) builder.append_slice(unicode(num), 2, lnum) pos += 1 return space.newtuple([space.wrap(builder.build()), w_end]) else: typename = space.type(w_exc).getname(space) raise operationerrfmt(space.w_TypeError, "don't know how to handle %s in error callback", typename)
def func(): s = UnicodeBuilder() s.append(u'a') s.append(u'abc') s.append(u'abcdef') s.append_slice(u'abc', 1, 2) s.append_multiple_char(u'u', 4) return s.build()
def unicode_swapcase__Unicode(space, w_self): input = w_self._value builder = UnicodeBuilder(len(input)) for i in range(len(input)): unichar = ord(input[i]) if unicodedb.islower(unichar): builder.append(unichr(unicodedb.toupper(unichar))) elif unicodedb.isupper(unichar): builder.append(unichr(unicodedb.tolower(unichar))) else: builder.append(input[i]) return W_UnicodeObject(builder.build())
def _unicode_join_many_items(space, w_self, list_w, size): self = w_self._value sb = UnicodeBuilder() for i in range(size): if self and i != 0: sb.append(self) w_s = list_w[i] if isinstance(w_s, W_UnicodeObject): # shortcut for performance sb.append(w_s._value) else: try: sb.append(space.unicode_w(w_s)) except OperationError, e: if not e.match(space, space.w_TypeError): raise raise operationerrfmt(space.w_TypeError, "sequence item %d: expected string or Unicode", i)
def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): obj = space.realunicode_w(space.getattr(w_exc, space.wrap("object"))) start = space.int_w(space.getattr(w_exc, space.wrap("start"))) w_end = space.getattr(w_exc, space.wrap("end")) end = space.int_w(w_end) builder = UnicodeBuilder() pos = start while pos < end: ch = obj[pos] builder.append(u"&#") builder.append(unicode(str(ord(ch)))) builder.append(u";") pos += 1 return space.newtuple([space.wrap(builder.build()), w_end]) else: typename = space.type(w_exc).getname(space) raise operationerrfmt(space.w_TypeError, "don't know how to handle %s in error callback", typename)
def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): obj = space.realunicode_w(space.getattr(w_exc, space.wrap('object'))) start = space.int_w(space.getattr(w_exc, space.wrap('start'))) w_end = space.getattr(w_exc, space.wrap('end')) end = space.int_w(w_end) builder = UnicodeBuilder() pos = start while pos < end: ch = obj[pos] builder.append(u"&#") builder.append(unicode(str(ord(ch)))) builder.append(u";") pos += 1 return space.newtuple([space.wrap(builder.build()), w_end]) else: typename = space.type(w_exc).getname(space) raise operationerrfmt(space.w_TypeError, "don't know how to handle %s in error callback", typename)
def func(): s = UnicodeBuilder() s.append(u"a") s.append(u"abc") return s.getlength()
def unicode_upper__Unicode(space, w_self): input = w_self._value builder = UnicodeBuilder(len(input)) for i in range(len(input)): builder.append(unichr(unicodedb.toupper(ord(input[i])))) return W_UnicodeObject(builder.build())
jit.loop_unrolling_heuristic(list_w, size)) def _unicode_join_many_items(space, w_self, list_w, size): self = w_self._value prealloc_size = len(self) * (size - 1) for i in range(size): try: prealloc_size += len(space.unicode_w(list_w[i])) except OperationError, e: if not e.match(space, space.w_TypeError): raise raise operationerrfmt(space.w_TypeError, "sequence item %d: expected string or Unicode", i) sb = UnicodeBuilder(prealloc_size) for i in range(size): if self and i != 0: sb.append(self) w_s = list_w[i] sb.append(space.unicode_w(w_s)) return space.wrap(sb.build()) def hash__Unicode(space, w_uni): s = w_uni._value if space.config.objspace.std.withrope: # be compatible with the special ropes hash # XXX no caching if len(s) == 0: return space.wrap(0) x = 0 for c in s: x = intmask((1000003 * x) + ord(c)) x <<= 1