def _download_feed(chan_id): downer = httpkie.Downloader() data = downer.download("%s/r/%s/.rss" % (URL, chan_id)) search_link = ('<atom:link rel="self" ' + 'href="%s/subreddits/search.rss?q=%s" ' % (URL, chan_id) + 'type="application/rss+xml" />') if search_link in data: raise ValueError("Incorrect `chan_id`: '%s'!" % chan_id) return data
def _get_source(link): """ Return source of the `link` whether it is filename or url. Args: link (str): Filename or URL. Returns: str: Content. Raises: UserWarning: When the `link` couldn't be resolved. """ if link.startswith("http://") or link.startswith("https://"): down = httpkie.Downloader() return down.download(link) if os.path.exists(link): with open(link) as f: return f.read() raise UserWarning("html: '%s' is neither URL or data!" % link)
# # Interpreter version: python 2.7 # # Imports ===================================================================== from urlparse import urljoin import arrow import httpkie import dhtmlparser from feedgen.feed import FeedGenerator # Variables =================================================================== URL = "http://liberland.org/cz/news/" DOWNER = httpkie.Downloader() # Functions & classes ========================================================= def _parse_link(div): # parse link link = div.find("a") if not link: return link = link[0].params.get("href", None) if not link: return return urljoin(URL, link)
def _download_feed(): downer = httpkie.Downloader() return downer.download("https://lobste.rs/rss")