Exemplo n.º 1
0
def encodeex(encodebuf,
             unicodedata,
             errors="strict",
             errorcb=None,
             namecb=None,
             ignore_error=0):
    inleft = len(unicodedata)
    with rffi.scoped_nonmoving_unicodebuffer(unicodedata) as inbuf:
        if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0:
            raise MemoryError
        if ignore_error == 0:
            flags = MBENC_FLUSH | MBENC_RESET
        else:
            flags = 0
        while True:
            r = pypy_cjk_enc_chunk(encodebuf, flags)
            if r == 0 or r == ignore_error:
                break
            multibytecodec_encerror(encodebuf, r, errors, errorcb, namecb,
                                    unicodedata)
        while flags & MBENC_RESET:
            r = pypy_cjk_enc_reset(encodebuf)
            if r == 0:
                break
            multibytecodec_encerror(encodebuf, r, errors, errorcb, namecb,
                                    unicodedata)
        src = pypy_cjk_enc_outbuf(encodebuf)
        length = pypy_cjk_enc_outlen(encodebuf)
        return rffi.charpsize2str(src, length)
Exemplo n.º 2
0
def multibytecodec_decerror(decodebuf, e, errors, errorcb, namecb, stringdata):
    if e > 0:
        reason = "illegal multibyte sequence"
        esize = e
    elif e == MBERR_TOOFEW:
        reason = "incomplete multibyte sequence"
        esize = pypy_cjk_dec_inbuf_remaining(decodebuf)
    elif e == MBERR_NOMEMORY:
        raise MemoryError
    else:
        raise RuntimeError
    #
    # compute the unicode to use as a replacement -> 'replace', and
    # the current position in the input 'unicodedata' -> 'end'
    start = pypy_cjk_dec_inbuf_consumed(decodebuf)
    end = start + esize
    if errors == "strict":
        raise EncodeDecodeError(start, end, reason)
    elif errors == "ignore":
        replace = u""
    elif errors == "replace":
        replace = UNICODE_REPLACEMENT_CHARACTER
    else:
        assert errorcb
        replace, end = errorcb(errors, namecb, reason, stringdata, start, end)
    with rffi.scoped_nonmoving_unicodebuffer(replace) as inbuf:
        r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
    if r == MBERR_NOMEMORY:
        raise MemoryError
Exemplo n.º 3
0
def multibytecodec_decerror(decodebuf, e, errors, errorcb, namecb, stringdata):
    if e > 0:
        reason = "illegal multibyte sequence"
        esize = e
    elif e == MBERR_TOOFEW:
        reason = "incomplete multibyte sequence"
        esize = pypy_cjk_dec_inbuf_remaining(decodebuf)
    elif e == MBERR_NOMEMORY:
        raise MemoryError
    else:
        raise RuntimeError
    #
    # compute the unicode to use as a replacement -> 'replace', and
    # the current position in the input 'unicodedata' -> 'end'
    start = pypy_cjk_dec_inbuf_consumed(decodebuf)
    end = start + esize
    if errors == "strict":
        raise EncodeDecodeError(start, end, reason)
    elif errors == "ignore":
        replace = u""
    elif errors == "replace":
        replace = UNICODE_REPLACEMENT_CHARACTER
    else:
        assert errorcb
        replace, end = errorcb(errors, namecb, reason, stringdata, start, end)
    with rffi.scoped_nonmoving_unicodebuffer(replace) as inbuf:
        r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
    if r == MBERR_NOMEMORY:
        raise MemoryError
Exemplo n.º 4
0
def _decode_helper(cp, s, flags, encoding, errors, errorhandler, final, start,
                   end, res):
    if end > len(s):
        end = len(s)
    piece = s[start:end]
    with rffi.scoped_nonmovingbuffer(piece) as dataptr:
        # first get the size of the result
        outsize = MultiByteToWideChar(cp, flags, dataptr, len(piece),
                                      lltype.nullptr(rffi.CWCHARP.TO), 0)
        if outsize == 0:
            r, pos = _decode_cp_error(s, errorhandler, encoding, errors, final,
                                      start, end)
            res.append(r)
            return pos, check_utf8(r, True)

        with rffi.scoped_alloc_unicodebuffer(outsize) as buf:
            # do the conversion
            if MultiByteToWideChar(cp, flags, dataptr, len(piece), buf.raw,
                                   outsize) == 0:
                r, pos = _decode_cp_error(s, errorhandler, encoding, errors,
                                          final, start, end)
                res.append(r)
                return pos, check_utf8(r, True)
            buf_as_str = buf.str(outsize)
            assert buf_as_str is not None
            with rffi.scoped_nonmoving_unicodebuffer(buf_as_str) as dataptr:
                conv = _unibuf_to_utf8(dataptr, outsize)
            res.append(conv)
            return end, codepoints_in_utf8(conv)
Exemplo n.º 5
0
def compare_digest(space, w_a, w_b):
    """compare_digest(a, b) -> bool

    Return 'a == b'.  This function uses an approach designed to prevent
    timing analysis, making it appropriate for cryptography.  a and b
    must both be of the same type: either str (ASCII only), or any type
    that supports the buffer protocol (e.g. bytes).

    Note: If a and b are of different lengths, or if an error occurs, a
    timing attack could theoretically reveal information about the types
    and lengths of a and b--but not their values.
    """
    if (space.isinstance_w(w_a, space.w_unicode) and
        space.isinstance_w(w_b, space.w_unicode)):
        a = space.unicode_w(w_a)
        b = space.unicode_w(w_b)
        with rffi.scoped_nonmoving_unicodebuffer(a) as a_buf:
            with rffi.scoped_nonmoving_unicodebuffer(b) as b_buf:
                result = pypy_tscmp_wide(a_buf, b_buf, len(a), len(b))
        return space.wrap(rffi.cast(lltype.Bool, result))
    return compare_digest_buffer(space, w_a, w_b)
Exemplo n.º 6
0
def compare_digest(space, w_a, w_b):
    """compare_digest(a, b) -> bool

    Return 'a == b'.  This function uses an approach designed to prevent
    timing analysis, making it appropriate for cryptography.  a and b
    must both be of the same type: either str (ASCII only), or any type
    that supports the buffer protocol (e.g. bytes).

    Note: If a and b are of different lengths, or if an error occurs, a
    timing attack could theoretically reveal information about the types
    and lengths of a and b--but not their values.
    """
    if (space.isinstance_w(w_a, space.w_unicode)
            and space.isinstance_w(w_b, space.w_unicode)):
        a = space.unicode_w(w_a)
        b = space.unicode_w(w_b)
        with rffi.scoped_nonmoving_unicodebuffer(a) as a_buf:
            with rffi.scoped_nonmoving_unicodebuffer(b) as b_buf:
                result = pypy_tscmp_wide(a_buf, b_buf, len(a), len(b))
        return space.wrap(rffi.cast(lltype.Bool, result))
    return compare_digest_buffer(space, w_a, w_b)
Exemplo n.º 7
0
    def utf8_encode_mbcs(s, errors, errorhandler, force_replace=True):
        # TODO: do the encoding without decoding utf8 -> unicode
        uni = s.decode('utf8')
        lgt = len(uni)
        if not force_replace and errors not in ('strict', 'replace'):
            msg = "mbcs encoding does not support errors='%s'" % errors
            errorhandler('strict', 'mbcs', msg, s, 0, 0)

        if lgt == 0:
            return ''

        if force_replace or errors == 'replace':
            flags = 0
            used_default_p = lltype.nullptr(BOOLP.TO)
        else:
            # strict
            flags = rwin32.WC_NO_BEST_FIT_CHARS
            used_default_p = lltype.malloc(BOOLP.TO, 1, flavor='raw')
            used_default_p[0] = rffi.cast(rwin32.BOOL, False)

        try:
            with rffi.scoped_nonmoving_unicodebuffer(uni) as dataptr:
                # first get the size of the result
                mbcssize = WideCharToMultiByte(CP_ACP, flags, dataptr, lgt,
                                               None, 0, None, used_default_p)
                if mbcssize == 0:
                    raise rwin32.lastSavedWindowsError()
                # If we used a default char, then we failed!
                if (used_default_p
                        and rffi.cast(lltype.Bool, used_default_p[0])):
                    errorhandler('strict', 'mbcs', "invalid character", s, 0,
                                 0)

                with rffi.scoped_alloc_buffer(mbcssize) as buf:
                    # do the conversion
                    if WideCharToMultiByte(CP_ACP, flags, dataptr, lgt,
                                           buf.raw, mbcssize, None,
                                           used_default_p) == 0:
                        raise rwin32.lastSavedWindowsError()
                    if (used_default_p
                            and rffi.cast(lltype.Bool, used_default_p[0])):
                        errorhandler('strict', 'mbcs', "invalid character", s,
                                     0, 0)
                    result = buf.str(mbcssize)
                    assert result is not None
                    return result
        finally:
            if used_default_p:
                lltype.free(used_default_p, flavor='raw')
Exemplo n.º 8
0
def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None, namecb=None, ignore_error=0):
    inleft = len(unicodedata)
    with rffi.scoped_nonmoving_unicodebuffer(unicodedata) as inbuf:
        if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0:
            raise MemoryError
        if ignore_error == 0:
            flags = MBENC_FLUSH | MBENC_RESET
        else:
            flags = 0
        while True:
            r = pypy_cjk_enc_chunk(encodebuf, flags)
            if r == 0 or r == ignore_error:
                break
            multibytecodec_encerror(encodebuf, r, errors, errorcb, namecb, unicodedata)
        while flags & MBENC_RESET:
            r = pypy_cjk_enc_reset(encodebuf)
            if r == 0:
                break
            multibytecodec_encerror(encodebuf, r, errors, errorcb, namecb, unicodedata)
        src = pypy_cjk_enc_outbuf(encodebuf)
        length = pypy_cjk_enc_outlen(encodebuf)
        return rffi.charpsize2str(src, length)