def str_translate__String_ANY_ANY(space, w_string, w_table, w_deletechars=''): """charfilter - unicode handling is not implemented Return a copy of the string where all characters occurring in the optional argument deletechars are removed, and the remaining characters have been mapped through the given translation table, which must be a string of length 256""" if space.is_w(w_table, space.w_None): table = DEFAULT_NOOP_TABLE else: table = space.bufferstr_w(w_table) if len(table) != 256: raise OperationError( space.w_ValueError, space.wrap("translation table must be 256 characters long")) string = w_string._value deletechars = space.str_w(w_deletechars) if len(deletechars) == 0: buf = StringBuilder(len(string)) for char in string: buf.append(table[ord(char)]) else: buf = StringBuilder() deletion_table = [False] * 256 for c in deletechars: deletion_table[ord(c)] = True for char in string: if not deletion_table[ord(char)]: buf.append(table[ord(char)]) return W_StringObject(buf.build())
def unicode_encode_utf_16_helper(s, size, errors, errorhandler=None, allow_surrogates=True, byteorder='little', public_encoding_name='utf16'): if errorhandler is None: errorhandler = default_unicode_error_encode if size == 0: if byteorder == 'native': result = StringBuilder(2) _STORECHAR(result, 0xFEFF, BYTEORDER) return result.build() return "" result = StringBuilder(size * 2 + 2) if byteorder == 'native': _STORECHAR(result, 0xFEFF, BYTEORDER) byteorder = BYTEORDER pos = 0 while pos < size: ch = ord(s[pos]) pos += 1 if ch < 0xD800: _STORECHAR(result, ch, byteorder) elif ch >= 0x10000: _STORECHAR(result, 0xD800 | ((ch - 0x10000) >> 10), byteorder) _STORECHAR(result, 0xDC00 | ((ch - 0x10000) & 0x3FF), byteorder) elif ch >= 0xE000 or allow_surrogates: _STORECHAR(result, ch, byteorder) else: ru, rs, pos = errorhandler(errors, public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) if rs is not None: # py3k only if len(rs) % 2 != 0: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) result.append(rs) continue for ch in ru: if ord(ch) < 0xD800: _STORECHAR(result, ord(ch), byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) continue return result.build()
def unicode_encode_utf_32_helper(s, size, errors, errorhandler=None, allow_surrogates=True, byteorder='little', public_encoding_name='utf32'): if errorhandler is None: errorhandler = default_unicode_error_encode if size == 0: if byteorder == 'native': result = StringBuilder(4) _STORECHAR32(result, 0xFEFF, BYTEORDER) return result.build() return "" result = StringBuilder(size * 4 + 4) if byteorder == 'native': _STORECHAR32(result, 0xFEFF, BYTEORDER) byteorder = BYTEORDER pos = 0 while pos < size: ch = ord(s[pos]) pos += 1 ch2 = 0 if not allow_surrogates and 0xD800 <= ch < 0xE000: ru, rs, pos = errorhandler(errors, public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) if rs is not None: # py3k only if len(rs) % 4 != 0: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) result.append(rs) continue for ch in ru: if ord(ch) < 0xD800: _STORECHAR32(result, ord(ch), byteorder) else: errorhandler('strict', public_encoding_name, 'surrogates not allowed', s, pos - 1, pos) continue if 0xD800 <= ch < 0xDC00 and MAXUNICODE < 65536 and pos < size: ch2 = ord(s[pos]) if 0xDC00 <= ch2 < 0xE000: ch = (((ch & 0x3FF) << 10) | (ch2 & 0x3FF)) + 0x10000 pos += 1 _STORECHAR32(result, ch, byteorder) return result.build()
def a2b_hqx(space, ascii): """Decode .hqx coding. Returns (bin, done).""" # overestimate the resulting length res = StringBuilder(len(ascii)) done = 0 pending_value = 0 pending_bits = 0 for c in ascii: n = ord(table_a2b_hqx[ord(c)]) if n <= 0x3F: pending_value = (pending_value << 6) | n pending_bits += 6 if pending_bits == 24: # flush res.append(chr(pending_value >> 16)) res.append(chr((pending_value >> 8) & 0xff)) res.append(chr(pending_value & 0xff)) pending_value = 0 pending_bits = 0 elif n == FAIL: raise_Error(space, 'Illegal character') elif n == DONE: if pending_bits >= 8: res.append(chr(pending_value >> (pending_bits - 8))) if pending_bits >= 16: res.append(chr((pending_value >> (pending_bits - 16)) & 0xff)) done = 1 break #elif n == SKIP: pass else: if pending_bits > 0: raise_Incomplete(space, 'String has incomplete number of bytes') return space.newtuple([space.newbytes(res.build()), space.newint(done)])
def draw(self): builder = StringBuilder() for i in range(len(self.segments)): builder.append(self.draw_segment(i)) builder.append(self.reset) builder.append(' ') return builder.build()
def namereplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) space.realutf8_w(w_obj) # for errors w_obj = space.convert_arg_to_w_unicode(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) start = w_obj._index_to_byte(start) end = w_obj._index_to_byte(end) builder = StringBuilder() obj = w_obj._utf8 pos = start while pos < end: oc = rutf8.codepoint_at_pos(obj, pos) try: name = unicodedb.name(oc) except KeyError: unicodehelper.raw_unicode_escape_helper(builder, oc) else: builder.append('\\N{') builder.append(name) builder.append('}') pos = rutf8.next_codepoint_pos(obj, pos) r = builder.build() lgt = rutf8.check_utf8(r, True) return space.newtuple([space.newutf8(r, lgt), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc)
def b2a_base64(space, bin): "Base64-code line of data." newlength = (len(bin) + 2) // 3 try: newlength = ovfcheck(newlength * 4) except OverflowError: raise OperationError(space.w_MemoryError, space.w_None) newlength += 1 res = StringBuilder(newlength) leftchar = 0 leftbits = 0 for c in bin: # Shift into our buffer, and output any 6bits ready leftchar = (leftchar << 8) | ord(c) leftbits += 8 res.append(table_b2a_base64[(leftchar >> (leftbits - 6)) & 0x3f]) leftbits -= 6 if leftbits >= 6: res.append(table_b2a_base64[(leftchar >> (leftbits - 6)) & 0x3f]) leftbits -= 6 # if leftbits == 2: res.append(table_b2a_base64[(leftchar & 3) << 4]) res.append(PAD) res.append(PAD) elif leftbits == 4: res.append(table_b2a_base64[(leftchar & 0xf) << 2]) res.append(PAD) res.append('\n') return space.wrapbytes(res.build())
def xmlcharrefreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) space.realutf8_w(w_obj) # weeoes w_obj = space.convert_arg_to_w_unicode(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) start = w_obj._index_to_byte(start) end = w_obj._index_to_byte(end) builder = StringBuilder() pos = start obj = w_obj._utf8 while pos < end: code = rutf8.codepoint_at_pos(obj, pos) builder.append("&#") builder.append(str(code)) builder.append(";") pos = rutf8.next_codepoint_pos(obj, pos) r = builder.build() lgt = rutf8.check_utf8(r, True) return space.newtuple([space.newutf8(r, lgt), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc)
def func(): s = StringBuilder() s.append("a") s.append("abc") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) return s.build()
def read(self, n=-1): assert isinstance(n, int) if n < 0: return self.readall() currentsize = len(self.buf) - self.pos start = self.pos assert start >= 0 if n <= currentsize: stop = start + n assert stop >= 0 result = self.buf[start:stop] self.pos += n return result else: builder = StringBuilder(n) builder.append_slice(self.buf, start, len(self.buf)) while 1: self.buf = self.do_read(self.bufsize) if not self.buf: self.pos = 0 break currentsize += len(self.buf) if currentsize >= n: self.pos = len(self.buf) - (currentsize - n) stop = self.pos assert stop >= 0 builder.append_slice(self.buf, 0, stop) break buf = self.buf assert buf is not None builder.append(buf) return builder.build()
def string_append(args): if not args: return W_String.fromascii("") builder = StringBuilder() unibuilder = None ascii_idx = 0 try: for ascii_idx in range(len(args)): arg = args[ascii_idx] if not isinstance(arg, W_String): raise SchemeException("string-append: expected a string") builder.append(arg.as_str_ascii()) except ValueError: unibuilder = UnicodeBuilder() unibuilder.append(unicode(builder.build())) builder = None for i in range(ascii_idx, len(args)): arg = args[i] if not isinstance(arg, W_String): raise SchemeException("string-append: expected a string") unibuilder.append(arg.as_unicode()) if unibuilder is None: assert builder is not None return W_String.fromascii(builder.build()) else: assert unibuilder is not None return W_String.fromunicode(unibuilder.build())
def _charp2str_to_null(cp, index): index = rffi.cast(lltype.Signed, index) string = StringBuilder() while cp[index] != '\x00': string.append(cp[index]) index += 1 return string.build()
def repr(self, numbers=True): i = 0 res = StringBuilder() bc = self.bytecode while i < len(bc): opcode = opcodes.opcodes[ord(bc[i])] c = i if opcode.numargs == 0: r = " " + opcode.name i += 1 elif opcode.numargs == 1: argval = (ord(bc[i + 1]) << 8) + ord(bc[i + 2]) r = " %s %d" % (opcode.name, argval) i += 3 else: assert opcode.numargs == 2 arg1 = (ord(bc[i + 1]) << 8) + ord(bc[i + 2]) arg2 = (ord(bc[i + 3]) << 8) + ord(bc[i + 4]) r = " %s %d %d" % (opcode.name, arg1, arg2) i += 5 if numbers: res.append("%3d" % c + r) else: res.append(r) res.append("\n") return res.build()
def bitwise_not(self, space): length = self.strlen() builder = StringBuilder(length) for i in range(length): c = ord(self.character(i)) builder.append(chr(c ^ 0xff)) return W_ConstStringObject(builder.build())
def unwrap(self): # note: always overriden so far length = self.strlen() builder = StringBuilder(length) for i in range(length): builder.append(self.character(i)) return builder.build()
def read(self, size=-1): # XXX CPython uses a more delicate logic here self._check_closed() ll_file = self._ll_file if size == 0: return "" elif size < 0: # read the entire contents buf = lltype.malloc(rffi.CCHARP.TO, BASE_BUF_SIZE, flavor='raw') try: s = StringBuilder() while True: returned_size = self._fread(buf, BASE_BUF_SIZE, ll_file) returned_size = intmask(returned_size) # is between 0 and BASE_BUF_SIZE if returned_size == 0: if c_feof(ll_file): # ok, finished return s.build() raise _error(ll_file) s.append_charpsize(buf, returned_size) finally: lltype.free(buf, flavor='raw') else: # size > 0 with rffi.scoped_alloc_buffer(size) as buf: returned_size = self._fread(buf.raw, size, ll_file) returned_size = intmask(returned_size) # is between 0 and size if returned_size == 0: if not c_feof(ll_file): raise _error(ll_file) s = buf.str(returned_size) assert s is not None return s
def rledecode_hqx(space, hexbin): "Decode hexbin RLE-coded string." # that's a guesstimation of the resulting length res = StringBuilder(len(hexbin)) end = len(hexbin) i = 0 lastpushed = -1 while i < end: c = hexbin[i] i += 1 if c != '\x90': res.append(c) lastpushed = ord(c) else: if i == end: raise_Incomplete(space, 'String ends with the RLE code \\x90') count = ord(hexbin[i]) - 1 i += 1 if count < 0: res.append('\x90') lastpushed = 0x90 else: if lastpushed < 0: raise_Error(space, 'String starts with the RLE code \\x90') res.append_multiple_char(chr(lastpushed), count) return space.newbytes(res.build())
def direct_read(self, n=-1): stream = self.getstream() self.check_readable() if n < 0: return stream.readall() else: result = StringBuilder(n) while n > 0: try: data = stream.read(n) except OSError as e: # a special-case only for read() (similar to CPython, which # also loses partial data with other methods): if we get # EAGAIN after already some data was received, return it. # Note that we can get EAGAIN while there is buffered data # waiting; read that too. if is_wouldblock_error(e.errno): m = stream.count_buffered_bytes() if m > 0: result.append(stream.read(min(n, m))) got = result.build() if len(got) > 0: return got raise if not data: break n -= len(data) result.append(data) return result.build()
def rlecode_hqx(space, data): "Binhex RLE-code binary data." # that's a guesstimation of the resulting length res = StringBuilder(len(data)) i = 0 end = len(data) while i < end: c = data[i] res.append(c) if c == '\x90': # Escape it, and ignore repetitions (*). res.append('\x00') else: # Check how many following are the same inend = i + 1 while inend < end and data[inend] == c and inend < i + 255: inend += 1 if inend - i > 3: # More than 3 in a row. Output RLE. For the case of more # than 255, see (*) below. res.append('\x90') res.append(chr(inend - i)) i = inend continue i += 1 # (*) Note that we put simplicity before compatness here, like CPython. # I am sure that if we tried harder to produce the smallest possible # string that rledecode_hqx() would expand back to 'data', there are # some programs somewhere that would start failing obscurely in rare # cases. return space.newbytes(res.build())
def writerow(self, w_fields): """Construct and write a CSV record from a sequence of fields. Non-string elements will be converted to string.""" space = self.space fields_w = space.listview(w_fields) dialect = self.dialect rec = StringBuilder(80) # for field_index in range(len(fields_w)): w_field = fields_w[field_index] if space.is_w(w_field, space.w_None): field = "" elif space.isinstance_w(w_field, space.w_float): field = space.str_w(space.repr(w_field)) else: field = space.str_w(space.str(w_field)) # if dialect.quoting == QUOTE_NONNUMERIC: try: space.float_w(w_field) # is it an int/long/float? quoted = False except OperationError, e: if e. async (space): raise quoted = True
def a2b_uu(space, ascii): "Decode a line of uuencoded data." if len(ascii) == 0: # obscure case, for compability with CPython length = (-0x20) & 0x3f else: length = (ord(ascii[0]) - 0x20) & 0x3f res = StringBuilder(length) for i in range(1, len(ascii), 4): A = _a2b_read(space, ascii, i) B = _a2b_read(space, ascii, i + 1) C = _a2b_read(space, ascii, i + 2) D = _a2b_read(space, ascii, i + 3) # if res.getlength() < length: res.append(chr(A << 2 | B >> 4)) elif A != 0 or B != 0: raise_Error(space, "Trailing garbage") # if res.getlength() < length: res.append(chr((B & 0xf) << 4 | C >> 2)) elif C != 0: raise_Error(space, "Trailing garbage") # if res.getlength() < length: res.append(chr((C & 0x3) << 6 | D)) elif D != 0: raise_Error(space, "Trailing garbage") remaining = length - res.getlength() if remaining > 0: res.append_multiple_char('\x00', remaining) return space.wrap(res.build())
def readall_w(self, space): self._check_closed(space) self._check_readable(space) total = 0 builder = StringBuilder() while True: newsize = int(new_buffersize(self.fd, total)) try: chunk = os.read(self.fd, newsize - total) except OSError as e: if e.errno == errno.EINTR: space.getexecutioncontext().checksignals() continue if total > 0: # return what we've got so far break if e.errno == errno.EAGAIN: return space.w_None raise wrap_oserror(space, e, exception_name='w_IOError') if not chunk: break builder.append(chunk) total += len(chunk) return space.newbytes(builder.build())
def direct_readline(self, size=-1): stream = self.getstream() self.check_readable() if size < 0: return stream.readline() else: # very inefficient unless there is a peek() result = StringBuilder() while size > 0: # "peeks" on the underlying stream to see how many chars # we can safely read without reading past an end-of-line startindex, peeked = stream.peek() assert 0 <= startindex <= len(peeked) endindex = startindex + size pn = peeked.find("\n", startindex, endindex) if pn < 0: pn = min(endindex - 1, len(peeked)) c = stream.read(pn - startindex + 1) if not c: break result.append(c) if c.endswith('\n'): break size -= len(c) return result.build()
def test_deflate_set_dictionary(): text = 'abcabc' zdict = 'abc' stream = rzlib.deflateInit() rzlib.deflateSetDictionary(stream, zdict) bytes = rzlib.compress(stream, text, rzlib.Z_FINISH) rzlib.deflateEnd(stream) stream2 = rzlib.inflateInit() from rpython.rtyper.lltypesystem import lltype, rffi, rstr from rpython.rtyper.annlowlevel import llstr from rpython.rlib.rstring import StringBuilder with lltype.scoped_alloc(rffi.CCHARP.TO, len(bytes)) as inbuf: rstr.copy_string_to_raw(llstr(bytes), inbuf, 0, len(bytes)) stream2.c_next_in = rffi.cast(rzlib.Bytefp, inbuf) rffi.setintfield(stream2, 'c_avail_in', len(bytes)) with lltype.scoped_alloc(rffi.CCHARP.TO, 100) as outbuf: stream2.c_next_out = rffi.cast(rzlib.Bytefp, outbuf) bufsize = 100 rffi.setintfield(stream2, 'c_avail_out', bufsize) err = rzlib._inflate(stream2, rzlib.Z_SYNC_FLUSH) assert err == rzlib.Z_NEED_DICT rzlib.inflateSetDictionary(stream2, zdict) rzlib._inflate(stream2, rzlib.Z_SYNC_FLUSH) avail_out = rffi.cast(lltype.Signed, stream2.c_avail_out) result = StringBuilder() result.append_charpsize(outbuf, bufsize - avail_out) rzlib.inflateEnd(stream2) assert result.build() == text
def fn(): return (compute_unique_id("foo"), compute_unique_id(u"bar"), compute_unique_id([1]), compute_unique_id({"foo": 3}), compute_unique_id(StringBuilder()), compute_unique_id(UnicodeBuilder()))
def _read_all(self, space): "Read all the file, don't update the cache" # Must run with the lock held! builder = StringBuilder() # First copy what we have in the current buffer current_size = self._readahead() data = None if current_size: data = self.buffer[self.pos:self.pos + current_size] builder.append(data) self.pos += current_size # We're going past the buffer's bounds, flush it if self.writable: self._flush_and_rewind_unlocked(space) self._reader_reset_buf() while True: # Read until EOF or until read() would block w_data = space.call_method(self.w_raw, "read") if space.is_w(w_data, space.w_None): if current_size == 0: return w_data break data = space.bytes_w(w_data) size = len(data) if size == 0: break builder.append(data) current_size += size if self.abs_pos != -1: self.abs_pos += size return space.newbytes(builder.build())
def reencode_utf8_with_surrogates(utf8): """ Receiving valid UTF8 which contains surrogates, combine surrogate pairs into correct UTF8 with pairs collpased. This is a rare case and you should not be using surrogate pairs in the first place, so the performance here is a bit secondary """ s = StringBuilder(len(utf8)) stop = len(utf8) i = 0 while i < stop: uchr = codepoint_at_pos(utf8, i) if 0xD800 <= uchr <= 0xDBFF: high = uchr i = next_codepoint_pos(utf8, i) if i >= stop: unichr_as_utf8_append(s, uchr, True) break low = codepoint_at_pos(utf8, i) if 0xDC00 <= low <= 0xDFFF: uchr = 0x10000 + (high - 0xD800) * 0x400 + (low - 0xDC00) i = next_codepoint_pos(utf8, i) # else not really a surrogate pair, just append high else: i = next_codepoint_pos(utf8, i) unichr_as_utf8_append(s, uchr, True) return s.build()
def decode_string_escaped(self, start): i = self.pos builder = StringBuilder((i - start) * 2) # just an estimate assert start >= 0 assert i >= 0 builder.append_slice(self.s, start, i) while True: ch = self.ll_chars[i] i += 1 if ch == '"': content_utf8 = builder.build() content_unicode = unicodehelper.decode_utf8( self.space, content_utf8) self.last_type = TYPE_STRING self.pos = i return self.space.newunicode(content_unicode) elif ch == '\\': i = self.decode_escape_sequence(i, builder) elif ch < '\x20': if ch == '\0': self._raise("Unterminated string starting at char %d", start - 1) else: self._raise("Invalid control character at char %d", i - 1) else: builder.append(ch)
def var_dump(self, space, indent, recursion): if self in recursion: return '%s*RECURSION*\n' % indent s = StringBuilder() recursion[self] = None header = 'object(%s)#%d ' % (self.getclass().name, self.get_instance_number()) orig_indent = indent if indent.endswith('&'): indent = indent[:-1] subindent = indent + ' ' counter = 0 all_names = [] all_values_w = [] self.enum_properties(space.ec.interpreter, all_names, all_values_w) properties = OrderedDict() for i in range(len(all_names)): name, access = demangle_property(all_names[i]) key = dump_property(name, access) properties[key] = '%s[%s]=>\n%s' % ( subindent, key, all_values_w[i].var_dump( space, subindent, recursion)) for part in properties.itervalues(): counter += 1 s.append(part) s.append('%s}\n' % indent) del recursion[self] return '%s%s(%d) {\n' % (orig_indent, header, counter) + s.build()
def http_build_query(interp, w_data, num_prefix="", arg_sep=None, enctype=1): space = interp.space if arg_sep is None: arg_sep = interp.config.get_ini_str("arg_separator.output") w_data = w_data.deref() out = StringBuilder() if not w_data.tp in [space.tp_array, space.tp_object]: interp.space.ec.warn("http_build_query(): Parameter 1 " "expected to be Array or Object. " "Incorrect value given") if w_data.tp == space.tp_array: with space.iter(w_data) as itr: while not itr.done(): w_key, w_value = itr.next_item(space) key = _get_key(space, num_prefix, w_key) res = _build_query(space, [], key, w_value, num_prefix, arg_sep, enctype) out.append(''.join(res)) if w_data.tp == space.tp_object: for key, w_value in w_data.get_instance_attrs(interp).iteritems(): _, prop = demangle_property(key) if prop: continue res = _build_query(space, [], key, w_value, num_prefix, arg_sep, enctype) out.append(''.join(res)) outstr = out.build() if outstr.endswith(arg_sep): outstr = outstr.rstrip(arg_sep) return interp.space.newstr(outstr)