Exemplo n.º 1
0
def generate_feed(items):
    fg = feed.FeedGenerator()
    fg.title('Superdesk')
    fg.id(flask.url_for('rss.index', _external=True))
    fg.link(href=flask.url_for('rss.index', _external=True), rel='self')
    fg.description('foo')
    for item in items:
        if not item.get('headline') and not item.get('slugline') and not item.get('name'):
            continue  # no title no atom
        entry = fg.add_entry()
        entry.guid('{}/{}'.format(flask.url_for('rss.index', _external=True).rstrip('/'), item['_id']))
        entry.title(item.get('headline', item.get('name', item.get('slugline', ''))))
        entry.pubDate(item.get('firstpublished'))
        entry.updated(item['versioncreated'])
        entry.content(get_content(item), type='CDATA')

        category = [{'term': s.get('name')} for s in item.get('subject', []) if s.get('scheme') == 'category']
        if category:
            entry.category(category)

        if item.get('description_text'):
            entry.summary(item['description_text'])

        if item.get('byline'):
            entry.author({'name': item['byline']})

    return fg.atom_str(pretty=True) \
        .replace(b'<content type="CDATA">', b'<content type="html">')
Exemplo n.º 2
0
def main():
    # Import the predefined credentials dictionary
    creds = pickle.load(open("creds.pickle"))
    auth = tw.OAuthHandler(creds['consumer_key'], creds['consumer_secret'])
    auth.set_access_token(creds['access_token'], creds['access_secret'])
    api = tw.API(auth)

    # Scraping tweets
    lowe_tweets = gtfu.get_tweets_for_user('ZachLowe_NBA', api)

    # Find URLs of Zach's articles
    zl_links = {}
    gmt = pytz.timezone('GMT')
    for tweet in lowe_tweets:
        for url in tweet.entities['urls']:
            link = url['expanded_url']
            if "espn.com" in link and "story" in link:
                # Store the link and date in the dict
                # Even if it already exists, overwrite it so you get the
                # *earliest* instance of the article being tweeted
                zl_links[link] = gmt.localize(tweet.created_at)

    # Now convert the dict to a list of tuples
    zl_links = [(zl_links[link], link) for link in zl_links]
    # Sort the tuples by their first value (the date)
    zl_links = sorted(zl_links, key=lambda x: x[0], reverse=True)

    # Traverse the links and extract title and description
    fg = feed.FeedGenerator()
    fg.link({'href':'http://74.215.107.79:5000/static/out.atom', 'rel':'self'})
    fg.title('Zach Lowe Feed')
    fg.id('http://74.215.107.79:5000/static/out.atom')
    fg.description("Zach Lowe's Articles")
    # Iterate over the link tuples
    for link in zl_links:
        valid_link = True
        date = link[0]
        url = link[1]
        tags = gtfl.get_tags_for_link(url, ['og:title', 'og:description'])
        try:
            title = tags['og:title'].attrs['content']
            desc = tags['og:description'].attrs['content']
        except:
            # If this isn't hacky...
            # I need to fix this later
            valid_link = False

        if valid_link:
            # Add an entry to the feed
            entry = fg.add_entry()
            entry.title(title)
            entry.description(desc)
            entry.content(desc)
            entry.updated(date)
            entry.guid(url)
            entry.author({'name': 'Zach Lowe'})
            entry.link({'href':url})

    print fg.atom_str(pretty=True)
Exemplo n.º 3
0
def _gen_feed(url, key, value):
    title = 'Failures for %s: %s' % (key, value)
    fg = feed.FeedGenerator()
    fg.title(title)
    fg.id(url)
    fg.link(href=url, rel='self')
    fg.description("The failed %s: %s tests feed" % (key, value))
    fg.language('en')
    return fg
Exemplo n.º 4
0
def generate_feed(items):
    fg = feed.FeedGenerator()
    fg.load_extension("dc")
    fg.title("Superdesk")
    fg.id(flask.url_for("rss.index", _external=True))
    fg.link(href=flask.url_for("rss.index", _external=True), rel="self")
    fg.description("Fidelity RSS")
    for item in items:
        if not item.get("headline") and not item.get("name"):
            continue  # no title no rss
        entry = fg.add_entry()
        entry.guid(item["_id"])
        entry.link({"href": get_permalink(item)})
        entry.title(item.get("headline", item.get("name", item.get("slugline", ""))))
        entry.published(item.get("firstpublished"))
        entry.updated(item["versioncreated"])
        entry.content(get_content(item), type="CDATA")

        if item.get("source"):
            entry.source(title=item["source"])

        if item.get("subject"):
            category = [
                {"term": s.get("name")}
                for s in item["subject"]
                if s.get("scheme") in CATEGORIES
            ]
            if category:
                entry.category(category)

        if item.get("authors"):
            authors = [
                author["name"]
                for author in item["authors"]
                if author.get("role") and author["role"].lower() == AUTHOR_ROLE.lower()
            ]
            if authors:
                entry.dc.dc_creator(", ".join(authors))

        if item.get("associations") and item["associations"].get(FEATUREMEDIA):
            media = item["associations"][FEATUREMEDIA]
            if media.get("renditions") and media["renditions"].get(WEB_RENDITION):
                entry.enclosure(
                    media["renditions"][WEB_RENDITION]["href"],
                    type=media["renditions"][WEB_RENDITION].get("mimetype"),
                )

    return fg.rss_str(pretty=True)
Exemplo n.º 5
0
def _create_generator(host: str, blogs: List[blog_models.Blog] = []):
    title = flask.current_app.config["FEED_TITLE"]
    description = flask.current_app.config["FEED_DESCRIPTION"]

    generator = feed.FeedGenerator()
    generator.title(title=title)
    generator.description(description=description)

    for blog in blogs:
        categories = list()
        blog_url = flask.url_for(endpoint="blogs.display", slug=blog.slug)
        author_url = flask.url_for(endpoint="accounts.display",
                                   username=blog.author.display)

        entry = generator.add_entry()
        entry.title(title=blog.title)
        entry.description(description=blog.description, isSummary=True)
        entry.content(content=blog.body)
        entry.guid(
            guid=f"{host}{blog_url}",
            permalink=True,
        )
        entry.author(
            name=blog.author.display,
            uri=f"{host}{author_url}",
            email=blog.author.email,
        )
        entry.published(published=blog.created_at)
        entry.updated(updated=blog.updated_at)

        for category in blog.categories:
            category_url = flask.url_for(endpoint="categories.display",
                                         slug=category.slug)
            category = dict(
                label=category.title,
                term=category.description,
                scheme=f"{host}{category_url}",
            )
            categories.append(category)
        entry.category(category=categories)
    return generator
Exemplo n.º 6
0
def create_rss_feed_xml():
    tweets = t.statuses.home_timeline()
    fg = feed.FeedGenerator()
    fg.id('https://twitter.com/omeranson')
    fg.title('Twitter feed')
    fg.subtitle('Twitter timeline')
    fg.link(href='https://twitter.com')
    fg.link(href='http://ansonet.no-ip.biz:8081/feed.xml', rel='self')
    fg.language('en')
    for tweet in tweets:
        fe = fg.add_entry()
        fe.id('https://twitter.com/i/status/%s' % (tweet['id_str']))
        fe.title('%s (@%s)' %
                 (tweet['user']['name'], tweet['user']['screen_name']))
        fe.link(href='https://twitter.com/i/status/%s' % (tweet['id_str']))
        fe.description(tweet['text'])
        fe.author(name=tweet['user']['name'])
    output = fg.atom_str(pretty=True)
    global cached_output
    cached_output = output
    return output
Exemplo n.º 7
0
def create_rss_feed_xml():
    tweets = t.statuses.home_timeline(tweet_mode='extended')
    fg = feed.FeedGenerator()
    fg.id('https://twitter.com/omeranson')
    fg.title('Twitter feed')
    fg.subtitle('Twitter timeline')
    fg.link(href='https://twitter.com')
    fg.link(href='http://ansonet.no-ip.biz:8081/feed.xml', rel='self')
    fg.language('en')
    for tweet in tweets:
        fe = fg.add_entry()
        fe.id('https://twitter.com/i/status/%s' % (tweet['id_str']))
        fe.title('%s (@%s)' %
                 (tweet['user']['name'], tweet['user']['screen_name']))
        fe.link(href='https://twitter.com/i/status/%s' % (tweet['id_str']))
        fe.content(_get_tweet_text(tweet), type='html')
        fe.author(name=tweet['user']['name'])
        fe.updated(
            datetime.datetime.strptime(tweet['created_at'],
                                       TWEET_CREATED_AT_FORMAT))
    output = fg.atom_str(pretty=True)
    global cached_output
    cached_output = output
    return output
Exemplo n.º 8
0
def main():
    # get the directory with the PEP sources
    pep_dir = Path(__file__).parent

    # get list of peps with creation time (from "Created:" string in pep source)
    peps_with_dt = sorted((pep_creation(path), path) for path in pep_dir.glob("pep-????.*"))

    # generate rss items for 10 most recent peps
    items = []
    for dt, full_path in peps_with_dt[-10:]:
        try:
            pep_num = int(full_path.stem.split("-")[-1])
        except ValueError:
            continue

        title = first_line_starting_with(full_path, "Title:")
        author = first_line_starting_with(full_path, "Author:")
        if "@" in author or " at " in author:
            parsed_authors = email.utils.getaddresses([author])
            # ideal would be to pass as a list of dicts with names and emails to
            # item.author, but FeedGen's RSS <author/> output doesn't pass W3C
            # validation (as of 12/06/2021)
            joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors)
        else:
            joined_authors = author
        url = f"https://www.python.org/dev/peps/pep-{pep_num:0>4}"

        item = entry.FeedEntry()
        item.title(f"PEP {pep_num}: {title}")
        item.link(href=url)
        item.description(pep_abstract(full_path))
        item.guid(url, permalink=True)
        item.published(dt.replace(tzinfo=datetime.timezone.utc))  # ensure datetime has a timezone
        item.author(email=joined_authors)
        items.append(item)

    # The rss envelope
    desc = """
    Newest Python Enhancement Proposals (PEPs) - Information on new
    language features, and some meta-information like release
    procedure and schedules.
    """.replace("\n    ", " ").strip()

    # Setup feed generator
    fg = feed.FeedGenerator()
    fg.language("en")
    fg.generator("")
    fg.docs("https://cyber.harvard.edu/rss/rss.html")

    # Add metadata
    fg.title("Newest Python PEPs")
    fg.link(href="https://www.python.org/dev/peps")
    fg.link(href="https://www.python.org/dev/peps/peps.rss", rel="self")
    fg.description(desc)
    fg.lastBuildDate(datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc))

    # Add PEP information (ordered by newest first)
    for item in items:
        fg.add_entry(item)

    pep_dir.joinpath("peps.rss").write_bytes(fg.rss_str(pretty=True))
Exemplo n.º 9
0
import bs4
from feedgen import feed

url = sys.argv[1]
feed_size = int(sys.argv[2])
title = sys.argv[3]
id = sys.argv[4]

with urllib.request.urlopen(url) as f:
    content = f.read().decode("utf8")

soup = bs4.BeautifulSoup(content, features="lxml")

posts = 0

f = feed.FeedGenerator()
f.title(title)
f.id(id)

for a in soup.find_all("a"):
    if posts == feed_size:
        break
    match = re.fullmatch("(....-..-..) (.*)", a.string)
    if not match:
        continue
    title = match.group(2)
    date = datetime.datetime.strptime(match.group(1), "%Y-%m-%d").date()

    fi = f.add_item()
    fi.title(title)
    fi.id(a["href"])