Python http_content_type_encoding примеры использования

Язык программирования: Python

Пространство имен/Пакет: w3lib.encoding

Метод/Функция: http_content_type_encoding

Примеров на hotexamples.com: 16

Python http_content_type_encoding - 16 примеров найдено. Это лучшие примеры Python кода для w3lib.encoding.http_content_type_encoding, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

Файл: transform_html.py Проект: zanachka/web-page-annotator

def transformed_response_body(
        response: Response,
        html_transform: Callable[[BeautifulSoup, str, ProxyUrl], None],
        proxy_url: ProxyUrl) -> Tuple[bool, bytes]:

    body = response.body or b''
    content_type = (response.headers or {}).get('content-type', '')
    if content_type.startswith('text/html'):
        encoding = http_content_type_encoding(content_type)
        try:
            base_url = get_base_url(body, response.url, encoding)
        except UnicodeDecodeError:
            base_url = response.url
        soup = BeautifulSoup(body, 'lxml', from_encoding=encoding)
        html_transform(
            soup, base_url=base_url, proxy_url=proxy_url)
        head = soup.find('head')
        if head:
            head.append(soup.new_tag('meta', charset='utf8'))
        return True, soup.encode('utf8')
    elif content_type.startswith('text/css'):
        css_source = body.decode('utf8', 'ignore')
        return (False, process_css(
            css_source, base_uri=response.url, proxy_url=proxy_url)
                .encode('utf8'))
    else:
        return False, body

Пример #2

Показать файл

 def _headers_encoding(self):
     """
     从headers获取头部charset编码
     """
     content_type = self.headers.get("Content-Type") or self.headers.get(
         "content-type")
     return (http_content_type_encoding(content_type) or "utf-8"
             if "application/json" in content_type else None)

Пример #3

Показать файл

    def process_request_unsafe(self, request, spider ):
        spider.session.visit(request.url)
        spider.session.wait()

        body = spider.session.body()
        headers = spider.session.headers()
        headers = dict((str(k),headers[k]) for k in headers)

        encoding = http_content_type_encoding(headers.get("Content-Type"))
        if encoding is None:
            encoding = http_content_type_encoding(body)

        if encoding is None:
            encoding = 'utf-8'

        if body is None:
            return

        return HtmlResponse( spider.session.url(), body=body, encoding=encoding, headers=dict((str(k),headers[k]) for k in headers) )

Пример #4

Показать файл

Файл: scraping.py Проект: I-TREND/SASF

    def __call__(self, session, url, *args, **kwargs):
        self.session = session
        session.visit(url)
        session.wait()

        body = session.body()
        headers = session.headers()
        headers = dict((k,headers[k]) for k in headers)
        content_type_header = headers.get("Content-Type")
        encoding = http_content_type_encoding(content_type_header)

        return HtmlPage(session.url(), headers=headers, body=body, encoding=encoding)

Пример #5

Показать файл

Файл: util.py Проект: yz599/ArchiveBox

def download_url(url: str, timeout: int=None) -> str:
    """Download the contents of a remote url and return the text"""
    from .config import TIMEOUT, CHECK_SSL_VALIDITY, WGET_USER_AGENT
    timeout = timeout or TIMEOUT
    response = requests.get(
        url,
        headers={'User-Agent': WGET_USER_AGENT},
        verify=CHECK_SSL_VALIDITY,
        timeout=timeout,
    )

    content_type = response.headers.get('Content-Type', '')
    encoding = http_content_type_encoding(content_type) or html_body_declared_encoding(response.text)

    if encoding is not None:
        response.encoding = encoding

    return response.text

Пример #6

Показать файл

Файл: text.py Проект: wusy1209/scrapy

 def _headers_encoding(self):
     content_type = self.headers.get(b'Content-Type', b'')
     return http_content_type_encoding(to_native_str(content_type))

Пример #7

Показать файл

Файл: httpclient.py Проект: qiulimao/tornadospider

 def _headers_encoding(cls,response):
     """
         根据content-type查看编码类型
     """
     content_type = response.headers.get('Content-Type')
     return http_content_type_encoding(content_type)

Пример #8

Показать файл

 def _headers_encoding(self):
     content_type = self.headers.get(b'Content-Type', b'')
     return http_content_type_encoding(to_native_str(content_type))

Пример #9

Показать файл

 def _infer_encoding_from_content_type(self):
     content_type = self.headers.get("Content-Type")
     if content_type:
         return http_content_type_encoding(content_type)
     return None

Пример #10

Показать файл

 def _headers_encoding(self):
     content_type = self.headers.get('Content-Type')
     return http_content_type_encoding(content_type)

Пример #11

Показать файл

Файл: text.py Проект: atharwa-24/scrapy

 def _headers_encoding(self):
     content_type = self.headers.get(b"Content-Type", b"")
     return http_content_type_encoding(to_unicode(content_type))

Пример #12

Показать файл

 def _headers_encoding(cls, response):
     """
         根据content-type查看编码类型
     """
     content_type = response.headers.get('Content-Type')
     return http_content_type_encoding(content_type)

Пример #13

Показать файл

Файл: text.py Проект: 0326/scrapy

 def _headers_encoding(self):
     content_type = self.headers.get('Content-Type')
     return http_content_type_encoding(content_type)

Пример #14

Показать файл

Файл: test_encoding.py Проект: azizur77/w3lib

 def test_http_encoding_header(self):
     header_value = "Content-Type: text/html; charset=ISO-8859-4"
     extracted = http_content_type_encoding(header_value)
     self.assertEqual(extracted, "iso8859-4")
     self.assertEqual(None, http_content_type_encoding("something else"))

Пример #15

Показать файл

Файл: test_encoding.py Проект: Dior222/w3lib

 def test_http_encoding_header(self):
     header_value = "Content-Type: text/html; charset=ISO-8859-4"
     extracted = http_content_type_encoding(header_value)
     self.assertEqual(extracted, "iso8859-4")
     self.assertEqual(None, http_content_type_encoding("something else"))

Пример #16

Показать файл

Файл: __utils.py Проект: CzaOrz/MiniTools

def guess_coding(body):
    return http_content_type_encoding(f'charset={chardet.detect(body)["encoding"]}')