def rledecode_hqx(space, hexbin): "Decode hexbin RLE-coded string." # that's a guesstimation of the resulting length res = StringBuilder(len(hexbin)) end = len(hexbin) i = 0 lastpushed = -1 while i < end: c = hexbin[i] i += 1 if c != '\x90': res.append(c) lastpushed = ord(c) else: if i == end: raise_Incomplete(space, 'String ends with the RLE code \\x90') count = ord(hexbin[i]) - 1 i += 1 if count < 0: res.append('\x90') lastpushed = 0x90 else: if lastpushed < 0: raise_Error(space, 'String starts with the RLE code \\x90') res.append_multiple_char(chr(lastpushed), count) return space.newbytes(res.build())
def a2b_uu(space, ascii): "Decode a line of uuencoded data." if len(ascii) == 0: # obscure case, for compability with CPython length = (-0x20) & 0x3f else: length = (ord(ascii[0]) - 0x20) & 0x3f res = StringBuilder(length) for i in range(1, len(ascii), 4): A = _a2b_read(space, ascii, i) B = _a2b_read(space, ascii, i + 1) C = _a2b_read(space, ascii, i + 2) D = _a2b_read(space, ascii, i + 3) # if res.getlength() < length: res.append(chr(A << 2 | B >> 4)) elif A != 0 or B != 0: raise_Error(space, "Trailing garbage") # if res.getlength() < length: res.append(chr((B & 0xf) << 4 | C >> 2)) elif C != 0: raise_Error(space, "Trailing garbage") # if res.getlength() < length: res.append(chr((C & 0x3) << 6 | D)) elif D != 0: raise_Error(space, "Trailing garbage") remaining = length - res.getlength() if remaining > 0: res.append_multiple_char('\x00', remaining) return space.wrap(res.build())
def func(): s = StringBuilder() s.append("a") s.append("abc") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) return s.build()
def a2b_uu(space, ascii): "Decode a line of uuencoded data." if len(ascii) == 0: # obscure case, for compability with CPython length = (-0x20) & 0x3F else: length = (ord(ascii[0]) - 0x20) & 0x3F res = StringBuilder(length) for i in range(1, len(ascii), 4): A = _a2b_read(space, ascii, i) B = _a2b_read(space, ascii, i + 1) C = _a2b_read(space, ascii, i + 2) D = _a2b_read(space, ascii, i + 3) # if res.getlength() < length: res.append(chr(A << 2 | B >> 4)) elif A != 0 or B != 0: raise_Error(space, "Trailing garbage") # if res.getlength() < length: res.append(chr((B & 0xF) << 4 | C >> 2)) elif C != 0: raise_Error(space, "Trailing garbage") # if res.getlength() < length: res.append(chr((C & 0x3) << 6 | D)) elif D != 0: raise_Error(space, "Trailing garbage") remaining = length - res.getlength() if remaining > 0: res.append_multiple_char("\x00", remaining) return space.wrap(res.build())
def test_string_builder(): s = StringBuilder() s.append("a") s.append("abc") assert s.getlength() == len('aabc') s.append("a") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) assert s.build() == "aabcabdddd"
def fn(): s = StringBuilder(4) s.append("abcd") s.append("defg") s.append("rty") s.append_multiple_char('y', 1000) rgc.collect() s.append_multiple_char('y', 1000) res = s.build()[1000] rgc.collect() return ord(res)
def test_string_builder(): s = StringBuilder() s.append("a") s.append("abc") assert s.getlength() == len('aabc') s.append("a") s.append_slice("abc", 1, 2) s.append_multiple_char('d', 4) result = s.build() assert result == "aabcabdddd" assert result == s.build() s.append("x") assert s.build() == result + "x"
def test_string_builder(): s = StringBuilder() s.append("a") s.append("abc") assert s.getlength() == len("aabc") s.append("a") s.append_slice("abc", 1, 2) s.append_multiple_char("d", 4) result = s.build() assert result == "aabcabdddd" assert result == s.build() s.append("x") assert s.build() == result + "x"
def decode_string_escaped(self, start, content_so_far): builder = StringBuilder(len(content_so_far)*2) # just an estimate builder.append(content_so_far) i = self.pos while True: ch = self.ll_chars[i] i += 1 if ch == '"': content_utf8 = builder.build() content_unicode = unicodehelper.decode_utf8(self.space, content_utf8) self.last_type = TYPE_STRING self.pos = i return self.space.wrap(content_unicode) elif ch == '\\': i = self.decode_escape_sequence(i, builder) elif ch == '\0': self._raise("Unterminated string starting at char %d", start) else: builder.append_multiple_char(ch, 1) # we should implement append_char
def str_zfill__String_ANY(space, w_self, w_width): input = w_self._value width = space.int_w(w_width) num_zeros = width - len(input) if num_zeros <= 0: # cannot return w_self, in case it is a subclass of str return space.wrap(input) builder = StringBuilder(width) if len(input) > 0 and (input[0] == '+' or input[0] == '-'): builder.append(input[0]) start = 1 else: start = 0 builder.append_multiple_char('0', num_zeros) builder.append_slice(input, start, len(input)) return space.wrap(builder.build())
def decode_string_escaped(self, start, content_so_far): builder = StringBuilder(len(content_so_far) * 2) # just an estimate builder.append(content_so_far) i = self.pos while True: ch = self.ll_chars[i] i += 1 if ch == '"': content_utf8 = builder.build() content_unicode = unicodehelper.decode_utf8( self.space, content_utf8) self.last_type = TYPE_STRING self.pos = i return self.space.wrap(content_unicode) elif ch == '\\': i = self.decode_escape_sequence(i, builder) elif ch == '\0': self._raise("Unterminated string starting at char %d", start) else: builder.append_multiple_char( ch, 1) # we should implement append_char
def backslashreplace_errors(space, w_exc): check_exception(space, w_exc) if space.isinstance_w(w_exc, space.w_UnicodeEncodeError): w_obj = space.getattr(w_exc, space.newtext('object')) space.realutf8_w(w_obj) # for errors w_obj = space.convert_arg_to_w_unicode(w_obj) start = space.int_w(space.getattr(w_exc, space.newtext('start'))) w_end = space.getattr(w_exc, space.newtext('end')) end = space.int_w(w_end) start = w_obj._index_to_byte(start) end = w_obj._index_to_byte(end) builder = StringBuilder() obj = w_obj._utf8 pos = start while pos < end: oc = rutf8.codepoint_at_pos(obj, pos) num = hex(oc) if (oc >= 0x10000): builder.append("\\U") zeros = 8 elif (oc >= 0x100): builder.append("\\u") zeros = 4 else: builder.append("\\x") zeros = 2 lnum = len(num) nb = zeros + 2 - lnum # num starts with '0x' if nb > 0: builder.append_multiple_char('0', nb) builder.append_slice(num, 2, lnum) pos = rutf8.next_codepoint_pos(obj, pos) r = builder.build() lgt = rutf8.check_utf8(r, True) return space.newtuple([space.newutf8(r, lgt), w_end]) else: raise oefmt(space.w_TypeError, "don't know how to handle %T in error callback", w_exc)
def a2b_uu(space, ascii): "Decode a line of uuencoded data." if len(ascii) == 0: # obscure case, for compability with CPython length = (-0x20) & 0x3F else: length = (ord(ascii[0]) - 0x20) & 0x3F res = StringBuilder(length) for i in range(1, len(ascii), 4): A = _a2b_read(space, ascii, i) B = _a2b_read(space, ascii, i + 1) C = _a2b_read(space, ascii, i + 2) D = _a2b_read(space, ascii, i + 3) # _a2b_write(space, res, length, A << 2 | B >> 4) _a2b_write(space, res, length, (B & 0xF) << 4 | C >> 2) _a2b_write(space, res, length, (C & 0x3) << 6 | D) remaining = length - res.getlength() if remaining > 0: res.append_multiple_char("\x00", remaining) return space.wrap(res.build())
def a2b_uu(space, ascii): "Decode a line of uuencoded data." if len(ascii) == 0: # obscure case, for compability with CPython length = (-0x20) & 0x3f else: length = (ord(ascii[0]) - 0x20) & 0x3f res = StringBuilder(length) for i in range(1, len(ascii), 4): A = _a2b_read(space, ascii, i) B = _a2b_read(space, ascii, i + 1) C = _a2b_read(space, ascii, i + 2) D = _a2b_read(space, ascii, i + 3) # _a2b_write(space, res, length, A << 2 | B >> 4) _a2b_write(space, res, length, (B & 0xf) << 4 | C >> 2) _a2b_write(space, res, length, (C & 0x3) << 6 | D) remaining = length - res.getlength() if remaining > 0: res.append_multiple_char('\x00', remaining) return space.wrap(res.build())
class PackFormatIterator(FormatIterator): def __init__(self, space, args_w, size): self.space = space self.args_w = args_w self.args_index = 0 self.result = StringBuilder(size) # This *should* be always unroll safe, the only way to get here is by # unroll the interpret function, which means the fmt is const, and thus # this should be const (in theory ;) @jit.unroll_safe @specialize.arg(1) def operate(self, fmtdesc, repetitions): if fmtdesc.needcount: fmtdesc.pack(self, repetitions) else: for i in range(repetitions): fmtdesc.pack(self) _operate_is_specialized_ = True @jit.unroll_safe def align(self, mask): pad = (-self.result.getlength()) & mask self.result.append_multiple_char('\x00', pad) def finished(self): if self.args_index != len(self.args_w): raise StructError("too many arguments for struct format") def accept_obj_arg(self): try: w_obj = self.args_w[self.args_index] except IndexError: raise StructError("struct format requires more arguments") self.args_index += 1 return w_obj def accept_int_arg(self): return self._accept_integral("int_w") def accept_uint_arg(self): return self._accept_integral("uint_w") def accept_longlong_arg(self): return self._accept_integral("r_longlong_w") def accept_ulonglong_arg(self): return self._accept_integral("r_ulonglong_w") @specialize.arg(1) def _accept_integral(self, meth): space = self.space w_obj = self.accept_obj_arg() if (space.isinstance_w(w_obj, space.w_int) or space.isinstance_w(w_obj, space.w_long)): w_index = w_obj else: w_index = None w_index_method = space.lookup(w_obj, "__index__") if w_index_method is not None: try: w_index = space.index(w_obj) except OperationError, e: if not e.match(space, space.w_TypeError): raise pass if w_index is None: w_index = self._maybe_float(w_obj) return getattr(space, meth)(w_index)
def format_number(digits, buflen, sign, decpt, code, precision, flags, upper): # We got digits back, format them. We may need to pad 'digits' # either on the left or right (or both) with extra zeros, so in # general the resulting string has the form # # [<sign>]<zeros><digits><zeros>[<exponent>] # # where either of the <zeros> pieces could be empty, and there's a # decimal point that could appear either in <digits> or in the # leading or trailing <zeros>. # # Imagine an infinite 'virtual' string vdigits, consisting of the # string 'digits' (starting at index 0) padded on both the left # and right with infinite strings of zeros. We want to output a # slice # # vdigits[vdigits_start : vdigits_end] # # of this virtual string. Thus if vdigits_start < 0 then we'll # end up producing some leading zeros; if vdigits_end > digits_len # there will be trailing zeros in the output. The next section of # code determines whether to use an exponent or not, figures out # the position 'decpt' of the decimal point, and computes # 'vdigits_start' and 'vdigits_end'. builder = StringBuilder(20) use_exp = False vdigits_end = buflen if code == 'e': use_exp = True vdigits_end = precision elif code == 'f': vdigits_end = decpt + precision elif code == 'g': if decpt <= -4: use_exp = True elif decpt > precision: use_exp = True elif flags & rfloat.DTSF_ADD_DOT_0 and decpt == precision: use_exp = True if flags & rfloat.DTSF_ALT: vdigits_end = precision elif code == 'r': # convert to exponential format at 1e16. We used to convert # at 1e17, but that gives odd-looking results for some values # when a 16-digit 'shortest' repr is padded with bogus zeros. # For example, repr(2e16+8) would give 20000000000000010.0; # the true value is 20000000000000008.0. if decpt <= -4 or decpt > 16: use_exp = True else: raise ValueError # if using an exponent, reset decimal point position to 1 and # adjust exponent accordingly. if use_exp: exp = decpt - 1 decpt = 1 else: exp = 0 # ensure vdigits_start < decpt <= vdigits_end, or vdigits_start < # decpt < vdigits_end if add_dot_0_if_integer and no exponent if decpt <= 0: vdigits_start = decpt-1 else: vdigits_start = 0 if vdigits_end <= decpt: if not use_exp and flags & rfloat.DTSF_ADD_DOT_0: vdigits_end = decpt + 1 else: vdigits_end = decpt # double check inequalities assert vdigits_start <= 0 assert 0 <= buflen <= vdigits_end # decimal point should be in (vdigits_start, vdigits_end] assert vdigits_start < decpt <= vdigits_end if sign == 1: builder.append('-') elif flags & rfloat.DTSF_SIGN: builder.append('+') # note that exactly one of the three 'if' conditions is true, so # we include exactly one decimal point # 1. Zero padding on left of digit string if decpt <= 0: builder.append_multiple_char('0', decpt - vdigits_start) builder.append('.') builder.append_multiple_char('0', 0 - decpt) else: builder.append_multiple_char('0', 0 - vdigits_start) # 2. Digits, with included decimal point if 0 < decpt <= buflen: builder.append(rffi.charpsize2str(digits, decpt - 0)) builder.append('.') ptr = rffi.ptradd(digits, decpt) builder.append(rffi.charpsize2str(ptr, buflen - decpt)) else: builder.append(rffi.charpsize2str(digits, buflen)) # 3. And zeros on the right if buflen < decpt: builder.append_multiple_char('0', decpt - buflen) builder.append('.') builder.append_multiple_char('0', vdigits_end - decpt) else: builder.append_multiple_char('0', vdigits_end - buflen) s = builder.build() # Delete a trailing decimal pt unless using alternative formatting. if not flags & rfloat.DTSF_ALT: last = len(s) - 1 if last >= 0 and s[last] == '.': s = s[:last] # Now that we've done zero padding, add an exponent if needed. if use_exp: if upper: e = 'E' else: e = 'e' if exp >= 0: exp_str = str(exp) if len(exp_str) < 2 and not (flags & rfloat.DTSF_CUT_EXP_0): s += e + '+0' + exp_str else: s += e + '+' + exp_str else: exp_str = str(-exp) if len(exp_str) < 2 and not (flags & rfloat.DTSF_CUT_EXP_0): s += e + '-0' + exp_str else: s += e + '-' + exp_str return s
def format_number(digits, buflen, sign, decpt, code, precision, flags, upper): # We got digits back, format them. We may need to pad 'digits' # either on the left or right (or both) with extra zeros, so in # general the resulting string has the form # # [<sign>]<zeros><digits><zeros>[<exponent>] # # where either of the <zeros> pieces could be empty, and there's a # decimal point that could appear either in <digits> or in the # leading or trailing <zeros>. # # Imagine an infinite 'virtual' string vdigits, consisting of the # string 'digits' (starting at index 0) padded on both the left # and right with infinite strings of zeros. We want to output a # slice # # vdigits[vdigits_start : vdigits_end] # # of this virtual string. Thus if vdigits_start < 0 then we'll # end up producing some leading zeros; if vdigits_end > digits_len # there will be trailing zeros in the output. The next section of # code determines whether to use an exponent or not, figures out # the position 'decpt' of the decimal point, and computes # 'vdigits_start' and 'vdigits_end'. builder = StringBuilder(20) use_exp = False vdigits_end = buflen if code == 'e': use_exp = True vdigits_end = precision elif code == 'f': vdigits_end = decpt + precision elif code == 'g': if decpt <= -4: use_exp = True elif decpt > precision: use_exp = True elif flags & rfloat.DTSF_ADD_DOT_0 and decpt == precision: use_exp = True if flags & rfloat.DTSF_ALT: vdigits_end = precision elif code == 'r': # convert to exponential format at 1e16. We used to convert # at 1e17, but that gives odd-looking results for some values # when a 16-digit 'shortest' repr is padded with bogus zeros. # For example, repr(2e16+8) would give 20000000000000010.0; # the true value is 20000000000000008.0. if decpt <= -4 or decpt > 16: use_exp = True else: raise ValueError # if using an exponent, reset decimal point position to 1 and # adjust exponent accordingly. if use_exp: exp = decpt - 1 decpt = 1 else: exp = 0 # ensure vdigits_start < decpt <= vdigits_end, or vdigits_start < # decpt < vdigits_end if add_dot_0_if_integer and no exponent if decpt <= 0: vdigits_start = decpt - 1 else: vdigits_start = 0 if vdigits_end <= decpt: if not use_exp and flags & rfloat.DTSF_ADD_DOT_0: vdigits_end = decpt + 1 else: vdigits_end = decpt # double check inequalities assert vdigits_start <= 0 assert 0 <= buflen <= vdigits_end # decimal point should be in (vdigits_start, vdigits_end] assert vdigits_start < decpt <= vdigits_end if sign == 1: builder.append('-') elif flags & rfloat.DTSF_SIGN: builder.append('+') # note that exactly one of the three 'if' conditions is true, so # we include exactly one decimal point # 1. Zero padding on left of digit string if decpt <= 0: builder.append_multiple_char('0', decpt - vdigits_start) builder.append('.') builder.append_multiple_char('0', 0 - decpt) else: builder.append_multiple_char('0', 0 - vdigits_start) # 2. Digits, with included decimal point if 0 < decpt <= buflen: builder.append(rffi.charpsize2str(digits, decpt - 0)) builder.append('.') ptr = rffi.ptradd(digits, decpt) builder.append(rffi.charpsize2str(ptr, buflen - decpt)) else: builder.append(rffi.charpsize2str(digits, buflen)) # 3. And zeros on the right if buflen < decpt: builder.append_multiple_char('0', decpt - buflen) builder.append('.') builder.append_multiple_char('0', vdigits_end - decpt) else: builder.append_multiple_char('0', vdigits_end - buflen) s = builder.build() # Delete a trailing decimal pt unless using alternative formatting. if not flags & rfloat.DTSF_ALT: last = len(s) - 1 if last >= 0 and s[last] == '.': s = s[:last] # Now that we've done zero padding, add an exponent if needed. if use_exp: if upper: e = 'E' else: e = 'e' if exp >= 0: exp_str = str(exp) if len(exp_str) < 2 and not (flags & rfloat.DTSF_CUT_EXP_0): s += e + '+0' + exp_str else: s += e + '+' + exp_str else: exp_str = str(-exp) if len(exp_str) < 2 and not (flags & rfloat.DTSF_CUT_EXP_0): s += e + '-0' + exp_str else: s += e + '-' + exp_str return s
class PackFormatIterator(FormatIterator): def __init__(self, space, args_w, size): self.space = space self.args_w = args_w self.args_index = 0 self.result = StringBuilder(size) # This *should* be always unroll safe, the only way to get here is by # unroll the interpret function, which means the fmt is const, and thus # this should be const (in theory ;) @jit.unroll_safe @specialize.arg(1) def operate(self, fmtdesc, repetitions): if fmtdesc.needcount: fmtdesc.pack(self, repetitions) else: for i in range(repetitions): fmtdesc.pack(self) _operate_is_specialized_ = True @jit.unroll_safe def align(self, mask): pad = (-self.result.getlength()) & mask self.result.append_multiple_char('\x00', pad) def finished(self): if self.args_index != len(self.args_w): raise StructError("too many arguments for struct format") def accept_obj_arg(self): try: w_obj = self.args_w[self.args_index] except IndexError: raise StructError("struct format requires more arguments") self.args_index += 1 return w_obj def accept_int_arg(self): return self._accept_integral("int_w") def accept_uint_arg(self): return self._accept_integral("uint_w") def accept_longlong_arg(self): return self._accept_integral("r_longlong_w") def accept_ulonglong_arg(self): return self._accept_integral("r_ulonglong_w") @specialize.arg(1) def _accept_integral(self, meth): space = self.space w_obj = self.accept_obj_arg() if (space.isinstance_w(w_obj, space.w_int) or space.isinstance_w(w_obj, space.w_long)): w_index = w_obj else: w_index = None if space.lookup(w_obj, '__index__'): try: w_index = space.index(w_obj) except OperationError, e: if not e.match(space, space.w_TypeError): raise pass if w_index is None and space.lookup(w_obj, '__int__'): if space.isinstance_w(w_obj, space.w_float): msg = "integer argument expected, got float" else: msg = "integer argument expected, got non-integer" \ " (implicit conversion using __int__ is deprecated)" space.warn(space.wrap(msg), space.w_DeprecationWarning) w_index = space.int(w_obj) # wrapped float -> wrapped int or long if w_index is None: raise StructError("cannot convert argument to integer") method = getattr(space, meth) try: return method(w_index) except OperationError as e: if e.match(self.space, self.space.w_OverflowError): raise StructError("argument out of range") raise
class PackFormatIterator(FormatIterator): def __init__(self, space, args_w, size): self.space = space self.args_w = args_w self.args_index = 0 self.result = StringBuilder(size) # This *should* be always unroll safe, the only way to get here is by # unroll the interpret function, which means the fmt is const, and thus # this should be const (in theory ;) @jit.unroll_safe @specialize.arg(1) def operate(self, fmtdesc, repetitions): if fmtdesc.needcount: fmtdesc.pack(self, repetitions) else: for i in range(repetitions): fmtdesc.pack(self) _operate_is_specialized_ = True @jit.unroll_safe def align(self, mask): pad = (-self.result.getlength()) & mask self.result.append_multiple_char('\x00', pad) def finished(self): if self.args_index != len(self.args_w): raise StructError("too many arguments for struct format") def accept_obj_arg(self): try: w_obj = self.args_w[self.args_index] except IndexError: raise StructError("struct format requires more arguments") self.args_index += 1 return w_obj def accept_int_arg(self): return self._accept_integral("int_w") def accept_uint_arg(self): return self._accept_integral("uint_w") def accept_longlong_arg(self): return self._accept_integral("r_longlong_w") def accept_ulonglong_arg(self): return self._accept_integral("r_ulonglong_w") @specialize.arg(1) def _accept_integral(self, meth): space = self.space w_obj = self.accept_obj_arg() if space.isinstance_w(w_obj, space.w_int): w_index = w_obj else: w_index = None if space.lookup(w_obj, '__index__'): try: w_index = space.index(w_obj) except OperationError, e: if not e.match(space, space.w_TypeError): raise pass if w_index is None: raise StructError("required argument is not an integer") method = getattr(space, meth) try: return method(w_index) except OperationError as e: if e.match(self.space, self.space.w_OverflowError): raise StructError("argument out of range") raise
class PackFormatIterator(FormatIterator): def __init__(self, space, args_w, size): self.space = space self.args_w = args_w self.args_index = 0 self.result = StringBuilder(size) # This *should* be always unroll safe, the only way to get here is by # unroll the interpret function, which means the fmt is const, and thus # this should be const (in theory ;) @jit.unroll_safe @specialize.arg(1) def operate(self, fmtdesc, repetitions): if fmtdesc.needcount: fmtdesc.pack(self, repetitions) else: for i in range(repetitions): fmtdesc.pack(self) _operate_is_specialized_ = True @jit.unroll_safe def align(self, mask): pad = (-self.result.getlength()) & mask self.result.append_multiple_char('\x00', pad) def finished(self): if self.args_index != len(self.args_w): raise StructError("too many arguments for struct format") def accept_obj_arg(self): try: w_obj = self.args_w[self.args_index] except IndexError: raise StructError("struct format requires more arguments") self.args_index += 1 return w_obj def accept_int_arg(self): return self._accept_integral("int_w") def accept_uint_arg(self): return self._accept_integral("uint_w") def accept_longlong_arg(self): return self._accept_integral("r_longlong_w") def accept_ulonglong_arg(self): return self._accept_integral("r_ulonglong_w") @specialize.arg(1) def _accept_integral(self, meth): space = self.space w_obj = self.accept_obj_arg() if (space.isinstance_w(w_obj, space.w_int) or space.isinstance_w(w_obj, space.w_long)): w_index = w_obj else: w_index = None if space.lookup(w_obj, '__index__'): try: w_index = space.index(w_obj) except OperationError, e: if not e.match(space, space.w_TypeError): raise pass if w_index is None and space.lookup(w_obj, '__int__'): if space.isinstance_w(w_obj, space.w_float): msg = "integer argument expected, got float" else: msg = "integer argument expected, got non-integer" \ " (implicit conversion using __int__ is deprecated)" space.warn(space.wrap(msg), space.w_DeprecationWarning) w_index = space.int( w_obj) # wrapped float -> wrapped int or long if w_index is None: raise StructError("cannot convert argument to integer") return getattr(space, meth)(w_index)