def generate_feed(items): fg = feed.FeedGenerator() fg.title('Superdesk') fg.id(flask.url_for('rss.index', _external=True)) fg.link(href=flask.url_for('rss.index', _external=True), rel='self') fg.description('foo') for item in items: if not item.get('headline') and not item.get('slugline') and not item.get('name'): continue # no title no atom entry = fg.add_entry() entry.guid('{}/{}'.format(flask.url_for('rss.index', _external=True).rstrip('/'), item['_id'])) entry.title(item.get('headline', item.get('name', item.get('slugline', '')))) entry.pubDate(item.get('firstpublished')) entry.updated(item['versioncreated']) entry.content(get_content(item), type='CDATA') category = [{'term': s.get('name')} for s in item.get('subject', []) if s.get('scheme') == 'category'] if category: entry.category(category) if item.get('description_text'): entry.summary(item['description_text']) if item.get('byline'): entry.author({'name': item['byline']}) return fg.atom_str(pretty=True) \ .replace(b'<content type="CDATA">', b'<content type="html">')
def main(): # Import the predefined credentials dictionary creds = pickle.load(open("creds.pickle")) auth = tw.OAuthHandler(creds['consumer_key'], creds['consumer_secret']) auth.set_access_token(creds['access_token'], creds['access_secret']) api = tw.API(auth) # Scraping tweets lowe_tweets = gtfu.get_tweets_for_user('ZachLowe_NBA', api) # Find URLs of Zach's articles zl_links = {} gmt = pytz.timezone('GMT') for tweet in lowe_tweets: for url in tweet.entities['urls']: link = url['expanded_url'] if "espn.com" in link and "story" in link: # Store the link and date in the dict # Even if it already exists, overwrite it so you get the # *earliest* instance of the article being tweeted zl_links[link] = gmt.localize(tweet.created_at) # Now convert the dict to a list of tuples zl_links = [(zl_links[link], link) for link in zl_links] # Sort the tuples by their first value (the date) zl_links = sorted(zl_links, key=lambda x: x[0], reverse=True) # Traverse the links and extract title and description fg = feed.FeedGenerator() fg.link({'href':'http://74.215.107.79:5000/static/out.atom', 'rel':'self'}) fg.title('Zach Lowe Feed') fg.id('http://74.215.107.79:5000/static/out.atom') fg.description("Zach Lowe's Articles") # Iterate over the link tuples for link in zl_links: valid_link = True date = link[0] url = link[1] tags = gtfl.get_tags_for_link(url, ['og:title', 'og:description']) try: title = tags['og:title'].attrs['content'] desc = tags['og:description'].attrs['content'] except: # If this isn't hacky... # I need to fix this later valid_link = False if valid_link: # Add an entry to the feed entry = fg.add_entry() entry.title(title) entry.description(desc) entry.content(desc) entry.updated(date) entry.guid(url) entry.author({'name': 'Zach Lowe'}) entry.link({'href':url}) print fg.atom_str(pretty=True)
def _gen_feed(url, key, value): title = 'Failures for %s: %s' % (key, value) fg = feed.FeedGenerator() fg.title(title) fg.id(url) fg.link(href=url, rel='self') fg.description("The failed %s: %s tests feed" % (key, value)) fg.language('en') return fg
def generate_feed(items): fg = feed.FeedGenerator() fg.load_extension("dc") fg.title("Superdesk") fg.id(flask.url_for("rss.index", _external=True)) fg.link(href=flask.url_for("rss.index", _external=True), rel="self") fg.description("Fidelity RSS") for item in items: if not item.get("headline") and not item.get("name"): continue # no title no rss entry = fg.add_entry() entry.guid(item["_id"]) entry.link({"href": get_permalink(item)}) entry.title(item.get("headline", item.get("name", item.get("slugline", "")))) entry.published(item.get("firstpublished")) entry.updated(item["versioncreated"]) entry.content(get_content(item), type="CDATA") if item.get("source"): entry.source(title=item["source"]) if item.get("subject"): category = [ {"term": s.get("name")} for s in item["subject"] if s.get("scheme") in CATEGORIES ] if category: entry.category(category) if item.get("authors"): authors = [ author["name"] for author in item["authors"] if author.get("role") and author["role"].lower() == AUTHOR_ROLE.lower() ] if authors: entry.dc.dc_creator(", ".join(authors)) if item.get("associations") and item["associations"].get(FEATUREMEDIA): media = item["associations"][FEATUREMEDIA] if media.get("renditions") and media["renditions"].get(WEB_RENDITION): entry.enclosure( media["renditions"][WEB_RENDITION]["href"], type=media["renditions"][WEB_RENDITION].get("mimetype"), ) return fg.rss_str(pretty=True)
def _create_generator(host: str, blogs: List[blog_models.Blog] = []): title = flask.current_app.config["FEED_TITLE"] description = flask.current_app.config["FEED_DESCRIPTION"] generator = feed.FeedGenerator() generator.title(title=title) generator.description(description=description) for blog in blogs: categories = list() blog_url = flask.url_for(endpoint="blogs.display", slug=blog.slug) author_url = flask.url_for(endpoint="accounts.display", username=blog.author.display) entry = generator.add_entry() entry.title(title=blog.title) entry.description(description=blog.description, isSummary=True) entry.content(content=blog.body) entry.guid( guid=f"{host}{blog_url}", permalink=True, ) entry.author( name=blog.author.display, uri=f"{host}{author_url}", email=blog.author.email, ) entry.published(published=blog.created_at) entry.updated(updated=blog.updated_at) for category in blog.categories: category_url = flask.url_for(endpoint="categories.display", slug=category.slug) category = dict( label=category.title, term=category.description, scheme=f"{host}{category_url}", ) categories.append(category) entry.category(category=categories) return generator
def create_rss_feed_xml(): tweets = t.statuses.home_timeline() fg = feed.FeedGenerator() fg.id('https://twitter.com/omeranson') fg.title('Twitter feed') fg.subtitle('Twitter timeline') fg.link(href='https://twitter.com') fg.link(href='http://ansonet.no-ip.biz:8081/feed.xml', rel='self') fg.language('en') for tweet in tweets: fe = fg.add_entry() fe.id('https://twitter.com/i/status/%s' % (tweet['id_str'])) fe.title('%s (@%s)' % (tweet['user']['name'], tweet['user']['screen_name'])) fe.link(href='https://twitter.com/i/status/%s' % (tweet['id_str'])) fe.description(tweet['text']) fe.author(name=tweet['user']['name']) output = fg.atom_str(pretty=True) global cached_output cached_output = output return output
def create_rss_feed_xml(): tweets = t.statuses.home_timeline(tweet_mode='extended') fg = feed.FeedGenerator() fg.id('https://twitter.com/omeranson') fg.title('Twitter feed') fg.subtitle('Twitter timeline') fg.link(href='https://twitter.com') fg.link(href='http://ansonet.no-ip.biz:8081/feed.xml', rel='self') fg.language('en') for tweet in tweets: fe = fg.add_entry() fe.id('https://twitter.com/i/status/%s' % (tweet['id_str'])) fe.title('%s (@%s)' % (tweet['user']['name'], tweet['user']['screen_name'])) fe.link(href='https://twitter.com/i/status/%s' % (tweet['id_str'])) fe.content(_get_tweet_text(tweet), type='html') fe.author(name=tweet['user']['name']) fe.updated( datetime.datetime.strptime(tweet['created_at'], TWEET_CREATED_AT_FORMAT)) output = fg.atom_str(pretty=True) global cached_output cached_output = output return output
def main(): # get the directory with the PEP sources pep_dir = Path(__file__).parent # get list of peps with creation time (from "Created:" string in pep source) peps_with_dt = sorted((pep_creation(path), path) for path in pep_dir.glob("pep-????.*")) # generate rss items for 10 most recent peps items = [] for dt, full_path in peps_with_dt[-10:]: try: pep_num = int(full_path.stem.split("-")[-1]) except ValueError: continue title = first_line_starting_with(full_path, "Title:") author = first_line_starting_with(full_path, "Author:") if "@" in author or " at " in author: parsed_authors = email.utils.getaddresses([author]) # ideal would be to pass as a list of dicts with names and emails to # item.author, but FeedGen's RSS <author/> output doesn't pass W3C # validation (as of 12/06/2021) joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors) else: joined_authors = author url = f"https://www.python.org/dev/peps/pep-{pep_num:0>4}" item = entry.FeedEntry() item.title(f"PEP {pep_num}: {title}") item.link(href=url) item.description(pep_abstract(full_path)) item.guid(url, permalink=True) item.published(dt.replace(tzinfo=datetime.timezone.utc)) # ensure datetime has a timezone item.author(email=joined_authors) items.append(item) # The rss envelope desc = """ Newest Python Enhancement Proposals (PEPs) - Information on new language features, and some meta-information like release procedure and schedules. """.replace("\n ", " ").strip() # Setup feed generator fg = feed.FeedGenerator() fg.language("en") fg.generator("") fg.docs("https://cyber.harvard.edu/rss/rss.html") # Add metadata fg.title("Newest Python PEPs") fg.link(href="https://www.python.org/dev/peps") fg.link(href="https://www.python.org/dev/peps/peps.rss", rel="self") fg.description(desc) fg.lastBuildDate(datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc)) # Add PEP information (ordered by newest first) for item in items: fg.add_entry(item) pep_dir.joinpath("peps.rss").write_bytes(fg.rss_str(pretty=True))
import bs4 from feedgen import feed url = sys.argv[1] feed_size = int(sys.argv[2]) title = sys.argv[3] id = sys.argv[4] with urllib.request.urlopen(url) as f: content = f.read().decode("utf8") soup = bs4.BeautifulSoup(content, features="lxml") posts = 0 f = feed.FeedGenerator() f.title(title) f.id(id) for a in soup.find_all("a"): if posts == feed_size: break match = re.fullmatch("(....-..-..) (.*)", a.string) if not match: continue title = match.group(2) date = datetime.datetime.strptime(match.group(1), "%Y-%m-%d").date() fi = f.add_item() fi.title(title) fi.id(a["href"])