Python lookup 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: webencodings

메소드/함수: lookup

hotexamples.com에서의 예제들: 10

Python lookup - 10개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 webencodings.lookup에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: test.py 프로젝트: Connexions/tinycss2

def test_stylesheet_bytes(kwargs):
    kwargs['css_bytes'] = kwargs['css_bytes'].encode('latin1')
    kwargs.pop('comment', None)
    if kwargs.get('environment_encoding'):
        kwargs['environment_encoding'] = lookup(kwargs['environment_encoding'])
    kwargs.update(SKIP)
    return parse_stylesheet_bytes(**kwargs)

예제 #2

파일 보기

파일: htmlencoding.py 프로젝트: jhildreth/cosr-back

    def detect_xml_encoding(self):
        """ Detects an encoding from an XML prolog """

        match = _RE_XML_ENCODING.search(self.doc.source_data)
        if match:
            detected = webencodings.lookup(match.group(1))
            if detected:
                return detected.codec_info

예제 #3

파일 보기

파일: htmlencoding.py 프로젝트: jhildreth/cosr-back

    def guess_encoding(self):
        """ Makes an expensive guess of the charset with the chardet library """

        # TODO: would it be faster to look only in the first N thousand bytes?
        detected = cchardet.detect(self.doc.source_data)
        if detected.get("encoding"):
            c = webencodings.lookup(detected.get("encoding"))
            if c:
                return c.codec_info

예제 #4

파일 보기

파일: bytes.py 프로젝트: spladug/tinycss2

def decode_stylesheet_bytes(css_bytes, protocol_encoding=None,
                            environment_encoding=None):
    """Determine the character encoding of a CSS stylesheet and decode it.

    This is based on the presence of a ,
    an ``@charset`` rule,
    and encoding meta-information.

    :param css_bytes: A byte string.
    :param protocol_encoding:
        The encoding label, if any, defined by HTTP or equivalent protocol.
        (e.g. via the ``charset`` parameter of the ``Content-Type`` header.)
    :param environment_encoding:
        A :class:`webencodings.Encoding` object
        for the `environment encoding
        <http://www.w3.org/TR/css-syntax/#environment-encoding>`_,
        if any.
    :returns:
        A 2-tuple of a decoded Unicode string
        and the :class:`webencodings.Encoding` object that was used.

    """
    # http://dev.w3.org/csswg/css-syntax/#the-input-byte-stream
    if protocol_encoding:
        fallback = lookup(protocol_encoding)
        if fallback:
            return decode(css_bytes, fallback)
    if css_bytes.startswith(b'@charset "'):
        # 10 is len(b'@charset "')
        # 100 is arbitrary so that no encoding label is more than 100-10 bytes.
        end_quote = css_bytes.find(b'"', 10, 100)
        if end_quote != -1 and css_bytes.startswith(b'";', end_quote):
            fallback = lookup(css_bytes[10:end_quote].decode('latin1'))
            if fallback:
                if fallback.name in ('utf-16be', 'utf-16le'):
                    return decode(css_bytes, UTF8)
                return decode(css_bytes, fallback)
    if environment_encoding:
        return decode(css_bytes, environment_encoding)
    return decode(css_bytes, UTF8)

예제 #5

파일 보기

파일: htmlencoding.py 프로젝트: jhildreth/cosr-back

    def detect_meta_charset(self):
        """ Returns the encoding found in meta tags in the doc """

        for node in GUMBOCY_PARSER_HEAD.listnodes():
            if node[1] == "meta" and len(node) > 2:
                if node[2].get("charset"):
                    detected = webencodings.lookup(node[2]["charset"])
                    if detected:
                        return detected.codec_info
                elif node[2].get("http-equiv", "").lower().strip() == "content-type":
                    meta_encoding = get_encoding_from_content_type(node[2].get("content", ""))
                    if meta_encoding:
                        return meta_encoding

예제 #6

파일 보기

파일: htmlencoding.py 프로젝트: x0rzkov/cosr-back

    def detect_meta_charset(self):
        """ Returns the encoding found in meta tags in the doc """

        for node in GUMBOCY_PARSER_HEAD.listnodes():
            if node[1] == "meta" and len(node) > 2:
                if node[2].get("charset"):
                    detected = webencodings.lookup(node[2]["charset"])
                    if detected:
                        return detected.codec_info
                elif node[2].get("http-equiv",
                                 "").lower().strip() == "content-type":
                    meta_encoding = get_encoding_from_content_type(node[2].get(
                        "content", ""))
                    if meta_encoding:
                        return meta_encoding

예제 #7

파일 보기

파일: _inputstream.py 프로젝트: karansthr/html5lib-python

def lookupEncoding(encoding):
    """Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding."""
    if isinstance(encoding, bytes):
        try:
            encoding = encoding.decode("ascii")
        except UnicodeDecodeError:
            return None

    if encoding is not None:
        try:
            return webencodings.lookup(encoding)
        except AttributeError:
            return None
    else:
        return None

예제 #8

파일 보기

파일: inputstream.py 프로젝트: PeterVanLoon/html5lib-python

def lookupEncoding(encoding):
    """Return the python codec name corresponding to an encoding or None if the
    string doesn't correspond to a valid encoding."""
    if isinstance(encoding, binary_type):
        try:
            encoding = encoding.decode("ascii")
        except UnicodeDecodeError:
            return None

    if encoding is not None:
        try:
            return webencodings.lookup(encoding)
        except AttributeError:
            return None
    else:
        return None

예제 #9

파일 보기

파일: htmlencoding.py 프로젝트: x0rzkov/cosr-back

def get_encoding_from_content_type(content_type):
    _, params = cgi.parse_header(content_type.decode("ascii", "ignore"))
    if params.get("charset"):
        detected = webencodings.lookup(params["charset"])
        if detected:
            return detected.codec_info

예제 #10

파일 보기

파일: htmlencoding.py 프로젝트: jhildreth/cosr-back

def get_encoding_from_content_type(content_type):
    _, params = cgi.parse_header(content_type.decode("ascii", "ignore"))
    if params.get("charset"):
        detected = webencodings.lookup(params["charset"])
        if detected:
            return detected.codec_info