Python URLTitleReader示例

编程语言: Python

命名空间/包名称: urltitle

类/类型: URLTitleReader

hotexamples.com的示例: 6

Python URLTitleReader - 已找到6个示例。这些是从开源项目中提取的最受好评的urltitle.URLTitleReader现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

URLTitleReader(5)

title(4)

示例#1

显示文件

文件： test_urls.py 项目： SylarRaven/urltitle

 def test_url_titles_without_ssl_verification(self):
     reader = URLTitleReader(verify_ssl=False)
     for url, expected_title in TEST_CASES_WITH_BAD_SSL.items():
         if URL_FILTER and (URL_FILTER not in url):
             continue
         with self.subTest(url=url):
             self.assertEqual(expected_title, reader.title(url))

示例#2

显示文件

文件： test_urls.py 项目： SylarRaven/urltitle

 def test_url_titles(self):
     reader = URLTitleReader()
     for url, expected_title in TEST_CASES.items():
         if URL_FILTER and (URL_FILTER not in url):
             continue
         with self.subTest(url=url):
             self.assertEqual(expected_title, reader.title(url))

示例#3

显示文件

文件： check_url.py 项目： SylarRaven/urltitle

from urltitle import config, URLTitleReader

config.configure_logging()

TEST_URL = 'https://www.google.com/'

reader = URLTitleReader()
reader.title(TEST_URL)
reader.title(TEST_URL)  # Should use cache.

示例#4

显示文件

from urltitle import config, URLTitleReader

config.configure_logging()

TEST_URL = 'https://www.google.com/'

TEST_URLS = [
    'https://www.amazon.com/Natures-Plus-Chewable-Iron-Supplement/dp/B00014DAFM',
    'https://www.amazon.com/Bluebonnet-Earth-Vitamin-Chewable-Tablets/dp/B00ENYUIO2/',
    'https://www.amazon.com/dp/B0749WVS7J/ref=ods_gw_ha_h1_d_rr_021519?pf_rd_p=8bf51e9c-a499-47ad-829e-a0b4afcae72e&pf_rd_r=9SHQNHFS1W35WG02P75M',
    'https://www.amazon.com/dp/B0794W1SKP/ref=ods_mccc_lr',
    'https://www.amazon.com/ProsourceFit-Tri-Fold-Folding-Exercise-Carrying/dp/B07NCJDHBM?',
]

reader = URLTitleReader()
for url in TEST_URLS:
    reader.title(url)

示例#5

显示文件

"""Read and log the title of a URL."""
import logging

from urltitle import URLTitleReader, config

config.configure_logging()
log = logging.getLogger(f"{config.PACKAGE_NAME}.{__name__}")

URL = "https://www.google.com"

reader = URLTitleReader()  # pylint: disable=invalid-name
log.info(f"{URL} has title: {reader.title(URL)}")
log.info("Testing cache.")
log.info(f"{URL} has title: {reader.title(URL)}")  # Should use cache.

示例#6

显示文件

文件： try_headers_for_url.py 项目： impredicative/urltitle

config.configure_logging()
log = logging.getLogger(__name__)

EXTRA_HEADERS = {
    "Accept": "*/*",
    "Accept-Language": "en-US,en;q=0.5",
    "Accept-Encoding": "gzip",
    "Referer": "https://google.com/",
    "DNT": 1,
    "Connection": "keep-alive",
    "Cookie": "",
    "Upgrade-Insecure-Requests": 1,
    "Pragma": "no-cache",
    "Cache-Control": "no-cache",
}
NETLOC = URLTitleReader().netloc(TEST_URL)
log.info("Netloc for %s is %s.", TEST_URL, NETLOC)

titles: Dict[str, str] = {}
config.NETLOC_OVERRIDES[NETLOC] = {"extra_headers": {}}
EXTRA_CONFIG_HEADERS = config.NETLOC_OVERRIDES[NETLOC]["extra_headers"]
for h_key, h_val in EXTRA_HEADERS.items():
    log.debug("Adding header: %s=%s", h_key, h_val)
    EXTRA_CONFIG_HEADERS[h_key] = h_val
    reader = URLTitleReader()  # Fresh instance avoids cache.
    try:
        title = reader.title(TEST_URL)
    except URLTitleError as exc:
        log.error("Ignoring exception after adding header %s=%s: %s", h_key,
                  h_val, exc)
        continue