def investigate_etags() -> None: instance = config.INSTANCE netlocs = sorted({ url_to_netloc(feed_config['url']) for channel_config in instance['feeds'].values() for feed_config in channel_config.values() }) log.info('The unique netlocs are: %s', ', '.join(netlocs)) log.info('Number of unique netlocs is %s.', len(netlocs))
def investigate_etags() -> None: instance = config.INSTANCE netlocs = sorted({ url_to_netloc(feed_config["url"]) for channel_config in instance["feeds"].values() for feed_config in channel_config.values() }) log.info("The unique netlocs are: %s", ", ".join(netlocs)) log.info("Number of unique netlocs is %s.", len(netlocs))
import json import jmespath import requests from ircrssfeedbot import config from ircrssfeedbot.feed import ensure_list from ircrssfeedbot.util.urllib import url_to_netloc # pylint: disable=invalid-name # Customize: URL = "https://www.reddit.com/r/Nootropics/hot/.json?limit=98" JMES = "data.children[*].data | [?(not_null(link_flair_text) && score > `5`)].{title: join(``, [`[`, link_flair_text, `] `, title]), link: join(``, [`https://redd.it/`, id]), category: link_flair_text} | [?category == `Scientific Study` || category ==`News Article`]" # pylint: disable=line-too-long user_agent = config.USER_AGENT_OVERRIDES.get(url_to_netloc(URL), config.USER_AGENT_DEFAULT) content = requests.Session().get(URL, timeout=config.REQUEST_TIMEOUT, headers={ "User-Agent": user_agent }).content data = json.loads(content) entries = jmespath.search(JMES, data) for index, entry in enumerate(entries): title, link = entry["title"].strip(), entry["link"].strip() post = f"#{index+1}: {title}\n{link}\n" categories = ", ".join( html.unescape(c.strip()) for c in ensure_list(entry.get("category", [])))