def test_url_titles_without_ssl_verification(self): reader = URLTitleReader(verify_ssl=False) for url, expected_title in TEST_CASES_WITH_BAD_SSL.items(): if URL_FILTER and (URL_FILTER not in url): continue with self.subTest(url=url): self.assertEqual(expected_title, reader.title(url))
def test_url_titles(self): reader = URLTitleReader() for url, expected_title in TEST_CASES.items(): if URL_FILTER and (URL_FILTER not in url): continue with self.subTest(url=url): self.assertEqual(expected_title, reader.title(url))
from urltitle import config, URLTitleReader config.configure_logging() TEST_URL = 'https://www.google.com/' reader = URLTitleReader() reader.title(TEST_URL) reader.title(TEST_URL) # Should use cache.
from urltitle import config, URLTitleReader config.configure_logging() TEST_URL = 'https://www.google.com/' TEST_URLS = [ 'https://www.amazon.com/Natures-Plus-Chewable-Iron-Supplement/dp/B00014DAFM', 'https://www.amazon.com/Bluebonnet-Earth-Vitamin-Chewable-Tablets/dp/B00ENYUIO2/', 'https://www.amazon.com/dp/B0749WVS7J/ref=ods_gw_ha_h1_d_rr_021519?pf_rd_p=8bf51e9c-a499-47ad-829e-a0b4afcae72e&pf_rd_r=9SHQNHFS1W35WG02P75M', 'https://www.amazon.com/dp/B0794W1SKP/ref=ods_mccc_lr', 'https://www.amazon.com/ProsourceFit-Tri-Fold-Folding-Exercise-Carrying/dp/B07NCJDHBM?', ] reader = URLTitleReader() for url in TEST_URLS: reader.title(url)
"""Read and log the title of a URL.""" import logging from urltitle import URLTitleReader, config config.configure_logging() log = logging.getLogger(f"{config.PACKAGE_NAME}.{__name__}") URL = "https://www.google.com" reader = URLTitleReader() # pylint: disable=invalid-name log.info(f"{URL} has title: {reader.title(URL)}") log.info("Testing cache.") log.info(f"{URL} has title: {reader.title(URL)}") # Should use cache.
config.configure_logging() log = logging.getLogger(__name__) EXTRA_HEADERS = { "Accept": "*/*", "Accept-Language": "en-US,en;q=0.5", "Accept-Encoding": "gzip", "Referer": "https://google.com/", "DNT": 1, "Connection": "keep-alive", "Cookie": "", "Upgrade-Insecure-Requests": 1, "Pragma": "no-cache", "Cache-Control": "no-cache", } NETLOC = URLTitleReader().netloc(TEST_URL) log.info("Netloc for %s is %s.", TEST_URL, NETLOC) titles: Dict[str, str] = {} config.NETLOC_OVERRIDES[NETLOC] = {"extra_headers": {}} EXTRA_CONFIG_HEADERS = config.NETLOC_OVERRIDES[NETLOC]["extra_headers"] for h_key, h_val in EXTRA_HEADERS.items(): log.debug("Adding header: %s=%s", h_key, h_val) EXTRA_CONFIG_HEADERS[h_key] = h_val reader = URLTitleReader() # Fresh instance avoids cache. try: title = reader.title(TEST_URL) except URLTitleError as exc: log.error("Ignoring exception after adding header %s=%s: %s", h_key, h_val, exc) continue