Exemplo n.º 1
0
def select_charset(request):
    """Given a request, chooses a charset for encoding the response.

    If the selected charset is UTF-8, it always returns
    'utf-8' (const.CHARSET_UTF8), not 'utf8', 'UTF-8', etc.
    """
    # We assume that any client that doesn't support UTF-8 will specify a
    # preferred encoding in the Accept-Charset header, and will use this
    # encoding for content, query parameters, and form data.  We make this
    # assumption across all repositories.

    # Get a list of the charsets that the client supports.
    if request.get('charsets'):
        charsets = request.get('charsets').split(',')
    elif user_agents.prefer_sjis_charset(request):
        # Some Japanese feature phones don't (fully) support UTF-8.
        # They only support Shift_JIS. But they may not send Accept-Charset
        # header. Also, we haven't confirmed, but there may be phones whose
        # Accept-Charset header includes UTF-8 but its UTF-8 support is buggy.
        # So we always use Shift_JIS regardless of Accept-Charset header.
        charsets = ['Shift_JIS']
    else:
        charsets = request.accept_charset.best_matches()

    # Always prefer UTF-8 if the client supports it.
    for charset in charsets:
        if charset.lower().replace('_', '-') in ['utf8', 'utf-8']:
            return const.CHARSET_UTF8

    # Otherwise, look for a requested charset that Python supports.
    for charset in charsets:
        try:
            'xyz'.encode(charset, 'replace')  # test if charset is known
            return charset
        except:
            continue

    # If Python doesn't know any of the requested charsets, use UTF-8.
    return const.CHARSET_UTF8
Exemplo n.º 2
0
def select_charset(request):
    """Given a request, chooses a charset for encoding the response.

    If the selected charset is UTF-8, it always returns
    'utf-8' (const.CHARSET_UTF8), not 'utf8', 'UTF-8', etc.
    """
    # We assume that any client that doesn't support UTF-8 will specify a
    # preferred encoding in the Accept-Charset header, and will use this
    # encoding for content, query parameters, and form data.  We make this
    # assumption across all repositories.

    # Get a list of the charsets that the client supports.
    if request.get('charsets'):
        charsets = request.get('charsets').split(',')
    elif user_agents.prefer_sjis_charset(request):
        # Some Japanese feature phones don't (fully) support UTF-8.
        # They only support Shift_JIS. But they may not send Accept-Charset
        # header. Also, we haven't confirmed, but there may be phones whose
        # Accept-Charset header includes UTF-8 but its UTF-8 support is buggy.
        # So we always use Shift_JIS regardless of Accept-Charset header.
        charsets = ['Shift_JIS']
    else:
        charsets = request.accept_charset.best_matches()

    # Always prefer UTF-8 if the client supports it.
    for charset in charsets:
        if charset.lower().replace('_', '-') in ['utf8', 'utf-8']:
            return const.CHARSET_UTF8

    # Otherwise, look for a requested charset that Python supports.
    for charset in charsets:
        try:
            'xyz'.encode(charset, 'replace')  # test if charset is known
            return charset
        except:
            continue

    # If Python doesn't know any of the requested charsets, use UTF-8.
    return const.CHARSET_UTF8