def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None, namecb=None, ignore_error=0): inleft = len(unicodedata) with rffi.scoped_nonmoving_unicodebuffer(unicodedata) as inbuf: if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0: raise MemoryError if ignore_error == 0: flags = MBENC_FLUSH | MBENC_RESET else: flags = 0 while True: r = pypy_cjk_enc_chunk(encodebuf, flags) if r == 0 or r == ignore_error: break multibytecodec_encerror(encodebuf, r, errors, errorcb, namecb, unicodedata) while flags & MBENC_RESET: r = pypy_cjk_enc_reset(encodebuf) if r == 0: break multibytecodec_encerror(encodebuf, r, errors, errorcb, namecb, unicodedata) src = pypy_cjk_enc_outbuf(encodebuf) length = pypy_cjk_enc_outlen(encodebuf) return rffi.charpsize2str(src, length)
def multibytecodec_decerror(decodebuf, e, errors, errorcb, namecb, stringdata): if e > 0: reason = "illegal multibyte sequence" esize = e elif e == MBERR_TOOFEW: reason = "incomplete multibyte sequence" esize = pypy_cjk_dec_inbuf_remaining(decodebuf) elif e == MBERR_NOMEMORY: raise MemoryError else: raise RuntimeError # # compute the unicode to use as a replacement -> 'replace', and # the current position in the input 'unicodedata' -> 'end' start = pypy_cjk_dec_inbuf_consumed(decodebuf) end = start + esize if errors == "strict": raise EncodeDecodeError(start, end, reason) elif errors == "ignore": replace = u"" elif errors == "replace": replace = UNICODE_REPLACEMENT_CHARACTER else: assert errorcb replace, end = errorcb(errors, namecb, reason, stringdata, start, end) with rffi.scoped_nonmoving_unicodebuffer(replace) as inbuf: r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end) if r == MBERR_NOMEMORY: raise MemoryError
def _decode_helper(cp, s, flags, encoding, errors, errorhandler, final, start, end, res): if end > len(s): end = len(s) piece = s[start:end] with rffi.scoped_nonmovingbuffer(piece) as dataptr: # first get the size of the result outsize = MultiByteToWideChar(cp, flags, dataptr, len(piece), lltype.nullptr(rffi.CWCHARP.TO), 0) if outsize == 0: r, pos = _decode_cp_error(s, errorhandler, encoding, errors, final, start, end) res.append(r) return pos, check_utf8(r, True) with rffi.scoped_alloc_unicodebuffer(outsize) as buf: # do the conversion if MultiByteToWideChar(cp, flags, dataptr, len(piece), buf.raw, outsize) == 0: r, pos = _decode_cp_error(s, errorhandler, encoding, errors, final, start, end) res.append(r) return pos, check_utf8(r, True) buf_as_str = buf.str(outsize) assert buf_as_str is not None with rffi.scoped_nonmoving_unicodebuffer(buf_as_str) as dataptr: conv = _unibuf_to_utf8(dataptr, outsize) res.append(conv) return end, codepoints_in_utf8(conv)
def compare_digest(space, w_a, w_b): """compare_digest(a, b) -> bool Return 'a == b'. This function uses an approach designed to prevent timing analysis, making it appropriate for cryptography. a and b must both be of the same type: either str (ASCII only), or any type that supports the buffer protocol (e.g. bytes). Note: If a and b are of different lengths, or if an error occurs, a timing attack could theoretically reveal information about the types and lengths of a and b--but not their values. """ if (space.isinstance_w(w_a, space.w_unicode) and space.isinstance_w(w_b, space.w_unicode)): a = space.unicode_w(w_a) b = space.unicode_w(w_b) with rffi.scoped_nonmoving_unicodebuffer(a) as a_buf: with rffi.scoped_nonmoving_unicodebuffer(b) as b_buf: result = pypy_tscmp_wide(a_buf, b_buf, len(a), len(b)) return space.wrap(rffi.cast(lltype.Bool, result)) return compare_digest_buffer(space, w_a, w_b)
def utf8_encode_mbcs(s, errors, errorhandler, force_replace=True): # TODO: do the encoding without decoding utf8 -> unicode uni = s.decode('utf8') lgt = len(uni) if not force_replace and errors not in ('strict', 'replace'): msg = "mbcs encoding does not support errors='%s'" % errors errorhandler('strict', 'mbcs', msg, s, 0, 0) if lgt == 0: return '' if force_replace or errors == 'replace': flags = 0 used_default_p = lltype.nullptr(BOOLP.TO) else: # strict flags = rwin32.WC_NO_BEST_FIT_CHARS used_default_p = lltype.malloc(BOOLP.TO, 1, flavor='raw') used_default_p[0] = rffi.cast(rwin32.BOOL, False) try: with rffi.scoped_nonmoving_unicodebuffer(uni) as dataptr: # first get the size of the result mbcssize = WideCharToMultiByte(CP_ACP, flags, dataptr, lgt, None, 0, None, used_default_p) if mbcssize == 0: raise rwin32.lastSavedWindowsError() # If we used a default char, then we failed! if (used_default_p and rffi.cast(lltype.Bool, used_default_p[0])): errorhandler('strict', 'mbcs', "invalid character", s, 0, 0) with rffi.scoped_alloc_buffer(mbcssize) as buf: # do the conversion if WideCharToMultiByte(CP_ACP, flags, dataptr, lgt, buf.raw, mbcssize, None, used_default_p) == 0: raise rwin32.lastSavedWindowsError() if (used_default_p and rffi.cast(lltype.Bool, used_default_p[0])): errorhandler('strict', 'mbcs', "invalid character", s, 0, 0) result = buf.str(mbcssize) assert result is not None return result finally: if used_default_p: lltype.free(used_default_p, flavor='raw')