def invert_feed(feed: str, name: str) -> str:
    """
    Go through each element of the feed.
    :param feed:
    :return:
    """
    parsed: feedparser.FeedParserDict = feedparser.parse(feed)
    out_feed = feedgenerator.Rss201rev2Feed(name, "http://n-gate.com",
                                            "webshit condom")
    pool = multiprocessing.Pool(8)
    rs = pool.map_async(process_entry, parsed["entries"])
    pool.close()
    max_value = len(parsed["entries"])
    #with progressbar.ProgressBar(max_value=max_value) as bar:
    while True:
        if rs.ready():
            break
        #bar.update(min(complete_counter.value, max_value))
        time.sleep(1)

    for i in rs.get():
        out_feed.add_item(i.title,
                          i.entry,
                          i.description,
                          content=i.content_type,
                          pubdate=i.pubdate)

    return out_feed.writeString(encoding="utf-8")
示例#2
0
def rss_for_twitter(key_words, username):
    store = file.Storage('token.json')
    creds = store.get()
    if not creds or creds.invalid:
        flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
        creds = tools.run_flow(flow, store)
    service = build('drive', 'v3', http=creds.authorize(Http()))
    feed2 = feedgenerator.Rss201rev2Feed(title="Rss for twitter",
                                         link="https://twitter.com/",
                                         description="New in twitter",
                                         language="en")
    url = 'https://twitrss.me/twitter_user_to_rss/?user={}'.format(username)
    feed = feedparser.parse(url)
    for key in feed["entries"]:
        title = key['title']
        link = key['link']
        description = key['description']
        if contains_wanted(title.lower(), key_words):
            feed2.add_item(title=title,
                           link=link,
                           description=description,
                           unique_id='idposte')
    with open('rss_by_keywords_for_twitter.rss', 'w') as fp:
        feed2.write(fp, 'utf-8')
    file_metadata = {'name': 'rss_by_keywords_for_twitter.rss'}
    media = MediaFileUpload('rss_by_keywords_for_twitter.rss',
                            mimetype='text/plain',
                            resumable=True)
    fili = service.files().create(body=file_metadata,
                                  media_body=media,
                                  fields='id').execute()
    with open('rss_by_keywords_for_twitter.rss', 'w') as fp:
        feed2.write(fp, 'utf-8')
示例#3
0
 def test_rss_mime_type(self):
     """
     Test to make sure RSS MIME type has UTF8 Charset parameter set
     """
     rss_feed = feedgenerator.Rss201rev2Feed("title", "link", "description")
     self.assertEqual(rss_feed.mime_type,
                      "application/rss+xml; charset=utf-8")
def feed(id):
    keyword_list = get_keyword_list()
    if id not in keyword_list:
        return ""
    keyword = keyword_list[id]
    tweet_list = search_tweet(keyword)

    title = "「" + keyword + "」 - Twitter Hourly Trend Reporter"
    link = "https://twitter.com/"
    description = "「" + keyword + "」の最近のツィートです。"

    feed = feedgenerator.Rss201rev2Feed(title=title,
                                        link=link,
                                        description=description,
                                        language="ja")

    for tweet in tweet_list:
        title = "「" + keyword + "」の最新のツィートです。(" + tweet["created_at"] + ")"
        description = tweet["text"]
        link = "https://twitter.com/" + tweet[
            "author_id"] + "/status/" + tweet["id"]

        jp = timezone('Asia/Tokyo')
        pubdate = datetime.strptime(tweet["created_at"],
                                    '%Y-%m-%dT%H:%M:%S.%fZ').replace(tzinfo=jp)
        pubdate.timetz
        feed.add_item(title=title,
                      link=link,
                      description=description,
                      pubdate=pubdate)

    response = make_response(feed.writeString("utf-8"))
    response.headers["Content-Type"] = "application/xml"
    return response
示例#5
0
def filter_feed(feed_url, transform_func):
    """
    Fetches a feed using fetch_and_prepare_feed, passes the feed data through
    transform_func (which should make modifications in-place and return the feed),
    then regenerates and returns a feed as a string, pretty-printed using BeautifulSoup.
    """
    f = fetch_and_prepare_feed(feed_url)
    f = transform_func(f)
    if not f['feed'].get('description', None):
        if f['feed']['title']:
            f['feed']['description'] = f['feed']['title']
        elif f['feed']['link']:
            f['feed']['description'] = f['feed']['link']
        else:
            f['feed']['description'] = "Unknown title"
    title = f['feed'].pop('title')
    link = f['feed'].pop('link')
    description = f['feed'].pop('description')
    o = feedgenerator.Rss201rev2Feed(title, link, description, **f['feed'])
    for i in f['entries']:
        if not 'description' in i:
            for alt in ["summary"]:
                if alt in i:
                    i['description'] = i[alt]
        o.add_item(**i)
    feedstr = o.writeString("utf-8")
    feedstr = bs4.BeautifulSoup(feedstr, 'xml').prettify()
    return feedstr
示例#6
0
def emit_feed(app, exc):
    global feed_entries
    import os.path

    title = app.config.feed_title
    if not title:
        title = app.config.project

    feed_dict = {
        'title': title,
        'link': app.config.feed_base_url,
        'feed_url': app.config.feed_base_url,
        'description': app.config.feed_description
    }
    if app.config.language:
        feed_dict['language'] = app.config.language
    if app.config.copyright:
        feed_dict['feed_copyright'] = app.config.copyright
    feed = feedgenerator.Rss201rev2Feed(**feed_dict)
    app.builder.env.feed_feed = feed
    ordered_keys = feed_entries.keys()
    ordered_keys.sort(reverse=True)
    for key in ordered_keys:
        feed.add_item(**feed_entries[key])
    outfilename = os.path.join(app.builder.outdir, app.config.feed_filename)
    fp = open(outfilename, 'w')
    feed.write(fp, 'utf-8')
    fp.close()
示例#7
0
def feed():
    feed = feedgenerator.Rss201rev2Feed(
        title="Derek R. Arnold",
        link="http://derekarnold.net",
        description="Derek R. Arnold's Famous Internet Content",
        language="en")
    map(lambda post: feed.add_item(**post), Post.feed())

    return Response(feed.writeString('utf-8'), mimetype="text/xml")
示例#8
0
 def test_feed_with_feed_url_gets_rendered_with_atom_link(self):
     feed = feedgenerator.Rss201rev2Feed('title',
                                         '/link/',
                                         'descr',
                                         feed_url='/feed/')
     self.assertEqual(feed.meta['feed_url'], '/feed/')
     feed_content = feed.write_string('utf-8')
     self.assertIn('<atom:link href="/feed/" rel="self"></atom:link>',
                   feed_content)
def build_reading_page_rss(username, password):
    sess = DreamwidthSession(username, password)

    # Okay, now get the reading page.  This should contain the most recent
    # 1000 entries from the last 14 days, which is plenty.
    resp = sess.get('https://%s.dreamwidth.org/read' % username)
    assert "You're viewing your Reading Page." in resp.text

    reading_soup = bs4.BeautifulSoup(resp.text, 'html.parser')
    entries = reading_soup.find('div', attrs={'id': 'entries'})
    entry_wrappers = entries.findAll('div', attrs={'class': 'entry-wrapper'})

    # Now get the feed items.  This is just a pile of messy HTML parsing.
    #
    # Important note: because this RSS feed may be exposed outside Dreamwidth
    # (and thus outside the Dreamwidth access controls), it shouldn't include
    # overly sensitive information.  In particular, NO POST CONTENT.  This is
    # just some metadata I'm hoping isn't too secret -- in particular, the same
    # stuff you get from notification emails.
    #
    feed = feedgenerator.Rss201rev2Feed(
        title="%s's Dreamwidth reading page" % username,
        link='https://%s.dreamwidth.org/' % username,
        description="Entries on %s's reading page" % username,
        language='en')

    for e in entry_wrappers:
        h3_title = e.find('h3', attrs={'class': 'entry-title'})
        title = h3_title.find('a').text
        url = h3_title.find('a').attrs['href']

        datetime_span = e.find('span', attrs={'class': 'datetime'})
        date_span = datetime_span.find('span', attrs={'class': 'date'}).text
        time_span = datetime_span.find('span', attrs={'class': 'time'}).text
        pubdate = parse_date(date_span, time_span)

        poster = e.find('span', attrs={'class': 'poster'}).text

        try:
            tags = [
                li.find('a').text
                for li in e.find('div', attrs={
                    'class': 'tag'
                }).find('ul').findAll('li')
            ]
            description = 'This post is tagged with %s' % ', '.join(tags)
        except AttributeError:
            tags = []
            description = '(This post is untagged)'

        feed.add_item(title=title,
                      link=url,
                      pubdate=pubdate,
                      author_name=poster,
                      description=description)

    feed.write(sys.stdout, 'utf-8')
示例#10
0
def generate_feed(f):
    feed = feedgenerator.Rss201rev2Feed(title=f.feed.title,
                                        link=f.feed.link,
                                        description=f.feed.description)

    for e in f.entries:
        feed.add_item(title=e.title, link=e.link, description=e.description)

    return feed.writeString(encoding="UTF8")
示例#11
0
def rss():
    rn_events.fetch_hypothesis_timed()
    feed = feedgenerator.Rss201rev2Feed(
        title='ReproNim community events',
        link='http://www.reproducibleimaging.org/events.html',
        description='ReproNim community events.',
        langugage='en')
    for event in reversed(rn_events.get_events()):
        feed.add_item(title=event[1], link=event[0], description=event[1])
    data = feed.writeString('utf-8')
    return flask.Response(data, mimetype='application/rss+xml')
示例#12
0
 def __get_feed(self, title, link, description,
                feed_items) -> feedgenerator.Rss201rev2Feed:
     # rss feedを生成する
     feed = feedgenerator.Rss201rev2Feed(title=title,
                                         link=link,
                                         description=description)
     for item in feed_items:
         feed.add_item(title=item.title,
                       link=item.link,
                       description=item.description,
                       pubdate=item.pubdate)
     return feed
示例#13
0
def get_articles():
    """
    Returns a map of dates to a list of current events on that date.

    The root of this is parsing https://en.wikipedia.org/wiki/Portal:Current_events
    The true information we're after is included via
    https://en.wikipedia.org/wiki/Portal:Current_events/Inclusion
    which then includes the past seven days.
    """
    feed = feedgenerator.Rss201rev2Feed(
        'Wikipedia: Portal: Current events',
        'https://en.wikipedia.org/wiki/Portal:Current_events',
        'Wikipedia: Portal: Current events')

    # Start at today.
    day = date.today()

    for i in range(7):
        day -= timedelta(days=1)

        # Download the article content.
        article = get_article_by_date(day)
        # Parse the article contents.
        wikicode = mwparserfromhell.parse(article)
        nodes = wikicode.filter(recursive=False, matches=filter_templates)

        # Remove all nodes before / after the start / end comments.
        start = 0
        end = len(nodes) - 1
        for i, node in enumerate(nodes):
            if isinstance(node, Comment):
                if 'All news items below this line' in node:
                    start = i + 1
                elif 'All news items above this line' in node:
                    end = i
                    break

        # Ignore nodes outside of the start/end.
        nodes = nodes[start:end]

        composer = WikicodeToHtmlComposer()

        try:
            feed.add_item(title=u'Current events: {}'.format(day),
                          link=get_article_url(day),
                          description=composer.compose(nodes),
                          pubdate=datetime(*day.timetuple()[:3]))
        except HtmlComposingError:
            print("Unable to render article from: {}".format(day))

    return feed.writeString('utf-8')
示例#14
0
def save_rss(f, contents):
    feed = feedgenerator.Rss201rev2Feed(title='おすすめノート',
                                        link='https://note.mu/',
                                        description='おすすめノート')

    for content in contents:
        feed.add_item(
            title=content['title'],
            link=content['url'],
            description=content['description'],
            unique_id=content['url'],
        )

    feed.write(f, ENCODE)
示例#15
0
def make_rss(name, query):
    filter_query = client.factory.create('ArrayOfFilteroptionstype')
    for k, v in query.iteritems():
        filter_options = client.factory.create('FilterOptionsType')
        filter_options.filterId = k

        if k in range_filters:
            filter_range = client.factory.create('RangeValueType')
            if 'min' in v:
                filter_range.rangeValueMin = v['min']
            if 'max' in v:
                filter_range.rangeValueMax = v['max']
            filter_options.filterValueRange = filter_range
        else:
            filter_array = client.factory.create('ArrayOfString')
            filter_array.item = v
            filter_options.filterValueId = filter_array

        filter_query.item.append(filter_options)

    res = client.service.doGetItemsList(webAPI,
                                        countryId,
                                        filter_query,
                                        resultScope=3)

    feed = feedgenerator.Rss201rev2Feed(title='Allegro - %s' %
                                        query.get('search', name),
                                        link='http://allegro.pl',
                                        description=name,
                                        language='pl')

    items = []
    if res.itemsCount > 0:
        items = res.itemsList.item

    for item in items:
        feed.add_item(
            title=item.itemTitle,
            link="https://allegro.pl/show_item.php?item=%s" % item.itemId,
            description=
            u"Sprzedający: <a href='https://allegro.pl/show_user.php?uid=%s'>%s</a><br/>%s<br/>Do końca: %s<br/>%s"
            % (
                item.sellerInfo.userId,
                item.sellerInfo.userLogin,
                make_price_line(item),
                make_date_line(item),
                make_image_line(item),
            ))
    with open(os.path.join(outdir, 'allegro-' + name + '.xml'), 'w') as f:
        feed.write(f, 'utf-8')
def _build_feed(request, feed_conf):
    """Build the feed."""

    request_page = MoinMoin.Page.Page(request, request.page.page_name)

    feed = feedgenerator.Rss201rev2Feed(
        title=feed_conf["blog-feed-title"],
        link=feed_conf["blog-feed-link"],
        feed_url=feed_conf["blog-feed-feed-url"],
        description=feed_conf["blog-feed-description"],
        language=feed_conf["blog-feed-language"],
    )

    return feed
示例#17
0
def create_rss(sxb, rss_url, base_url):
	rss = feedgenerator.Rss201rev2Feed(
		title = sxb.info.title,
		link = sxb.info.homepage,
		feed_url = rss_url,
		description = sxb.info.description,
		author_name= sxb.info.company,
		pubdate = sxb.info.date
	)

	for item in sxb.get_items_sorted():
		add_rss_item(rss, sxb, base_url, item)

	return rss.writeString('utf-8')
def save_as_feed(f, posts):
    # 文章コンテンツをリストをフィードとして保存
    # フィードを表す Rss201rev2Feed オブジェクトを作成
    feed = feedgenerator.Rss201rev2Feed(title='おすすめノート',
                                        link='https://note.mu/',
                                        description='おすすめノート')

    for post in posts:
        feed.add_item(title=post['title'],
                      link=post['url'],
                      description=post['description'],
                      unique_id=poost['url'])

    feed.write(f, 'utf-8')
示例#19
0
def tootfeed(query_feed):
    """ generate a rss feed from parsed mastodon search """

    if mastodonOK:
        buffered = []
        hashtagResult = mastodon.timeline_hashtag(query_feed)

        for toot in hashtagResult:

            toot['htmltext'] = '<blockquote><div><img src="' + toot['account']['avatar_static'] + \
                                '" alt="' + toot['account']['display_name'] + \
                                '" />   <strong>' + toot['account']['username'] + \
                                ': </strong>' + toot['content'] + '<br>' + \
                               '♻ : ' + str(toot['reblogs_count']) + ', ' + \
                               '✰ : ' + str(toot['favourites_count']) + '</div></blockquote>'

            if isinstance(toot['created_at'], str):
                toot['created_at'] = datetime.datetime.strptime(toot['created_at'], '%Y-%m-%dT%H:%M:%S.%fZ')

            buffered.append(toot.copy())

        utc = pytz.utc
        f = feedgenerator.Rss201rev2Feed(title=param['mastodon']['title'] + '"' + query_feed + '"',
                                         link=param['mastodon']['url'] + '/web/timelines/tag/' + query_feed,
                                         description=param['mastodon']['description'],
                                         language=param['feed']['language'],
                                         author_name=param['feed']['author_name'],
                                         feed_url=param['feed']['feed_url'])

        for toot in buffered:

            text = BeautifulSoup(toot['content'], "html.parser").text
            pubdate = toot['created_at']
            if not pubdate.tzinfo:
                pubdate = utc.localize(pubdate).astimezone(pytz.timezone(param['feed']['timezone']))

            if len(text) > text_length_limit:
                text = text[:text_length_limit] + '... '
            f.add_item(title=toot['account']['display_name'] + ' (' + toot['account']['username'] + '): '
                             + text,
                       link=toot['url'],
                       pubdate=pubdate,
                       description=toot['htmltext'])

        xml = f.writeString('UTF-8')
    else:
        xml = 'error - Mastodon parameters not defined'

    return xml
示例#20
0
def get_rss_9gag(key_words):
    store = file.Storage('token.json')
    creds = store.get()
    if not creds or creds.invalid:
        flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
        creds = tools.run_flow(flow, store)
    service = build('drive', 'v3', http=creds.authorize(Http()))
    feed2 = feedgenerator.Rss201rev2Feed(title="All Rss",
                                         link="https://9gag.com/",
                                         description="New in 9gag",
                                         language="en")
    list_rss = [
        'https://9gag-rss.com/api/rss/get?code=9GAGFresh&format=2',
        'https://9gag-rss.com/api/rss/get?code=9GAGFunny&format=2',
        'https://9gag-rss.com/api/rss/get?code=9GAGHot&format=2'
    ]
    for i in list_rss:
        feed = feedparser.parse(i)
        for key in feed["entries"]:
            title = key['title']
            url = key['links'][0]['href']
            soup = BeautifulSoup(key["summary"], "html.parser")
            try:
                image = soup.find('img')
                url_image = image['src']
            except:
                url_image = soup.findAll('source', type='video/mp4')[0]['src']
            if contains_wanted(title.lower(), key_words):
                feed2.add_item(title=title,
                               link=url,
                               description=url_image,
                               unique_id='idposte')

    with open('rss_by_keywords_for_9gag.rss', 'w') as fp:
        feed2.write(fp, 'utf-8')

    file_metadata = {'name': 'rss_by_keywords_for_9gag.rss'}
    media = MediaFileUpload('rss_by_keywords_for_9gag.rss',
                            mimetype='text/plain',
                            resumable=True)
    fili = service.files().create(body=file_metadata,
                                  media_body=media,
                                  fields='id').execute()
    o = feed2.writeString('utf-8')
    soup = BeautifulSoup(o, "xml")
    soup = soup.prettify()
    with open('templates/rss_by_keywords_for_9gag.rss', 'w') as fp:
        fp.write(str(soup))
示例#21
0
def save_as_feed(file, posts):
    '''文章コンテンツのリストをフィードとして保存する'''
    feed = feedgenerator.Rss201rev2Feed(title='おすすめノート',
                                        link='https://note.mu/',
                                        description='おすすめノート')

    for post in posts:
        #フィードにアイテムを追加する
        feed.add_item(
            title=post['title'],
            link=post['url'],
            description=post['description'],
            #idを指定しておくとRSSリーダーがアイテムの重複なく使える
            unique_id=post['url'])
        #ファイルオブジェクトに書き込む。第二引数にエンコーディングを指定する。
        feed.write(file, 'utf-8')
示例#22
0
    def WriteFeed(self, inItems: List[RSSItemTuple]) -> bool:
        """
        Takes the list of input items and writes it to a RSS feed

        :param inItems: list of RSSItemTuples
        :return: True on success, False otherwise
        """

        try:
            # Create the overall feed metadata
            myFeedGenerator = feedgenerator.Rss201rev2Feed(
                title="Brian's Twitter",
                link="http://www.digitalmaddox.com/myttrss/twitterfeed.rss",
                description="Generated by pyTwittertoRSS",
                language="en",
                author_name="Brian G. Maddox",
                author_email="*****@*****.**")

            # Go through each item and add it to the feed
            for feedItem in inItems:
                tAuthor = "{username}".format(username=feedItem.user_name)

                tTitle = "{author}: {tweettext}".format(
                    author=tAuthor, tweettext=feedItem.tweet_text)
                tDescription = "<b>Author:</b> {tauthor}<br><b>Tweet:</b> {fulltweet}<br><b>URLS:</b> {urls}". \
                    format(tauthor=tAuthor,
                           fulltweet=feedItem.tweet_text,
                           urls=feedItem.found_urls)

                myFeedGenerator.add_item(title=tTitle,
                                         link=feedItem.tweet_url,
                                         author_name=tAuthor,
                                         author_link=feedItem.user_url,
                                         pubdate=feedItem.created_at,
                                         description=tDescription)

            # Now write it to a file
            with open(feedFilename, 'w') as feedFile:
                myFeedGenerator.write(feedFile, 'utf-8')

        except Exception as tExcept:
            self.logger.critical("*** Unable to create the feed!")
            self.logger.error(tExcept)
            return False

        return True
示例#23
0
def emit_feed(app, exc):
    global feed_entries
    import os.path

    title = app.config.feed_title
    if not title:
        title = app.config.project

    feed_dict = {
        'title': title,
        'subtitle': app.config.feed_subtitle,
        'link': app.config.feed_base_url,
        'feed_url': app.config.feed_base_url,
        'description': app.config.feed_description,
        'categories': app.config.feed_categories,
        'author_name': app.config.feed_author_name,
        'author_email': app.config.feed_author_email
    }
    if app.config.language:
        feed_dict['language'] = app.config.language
    if app.config.copyright:
        feed_dict['feed_copyright'] = app.config.copyright
    # sort items
    ordered_keys = feed_entries.keys()
    ordered_keys.sort(reverse=True)
    # loop over all feed variants
    for feedvar in app.config.feed_variants:
        feedvar_settings = app.config.feed_variants[feedvar]
        feed = feedgenerator.Rss201rev2Feed(**feed_dict)
        app.builder.env.feed_feed = feed
        for key in ordered_keys:
            item = feed_entries[key]
            # only take the ones that should be in this feed
            if feedvar_settings['tag'] is None \
                    or feedvar_settings['tag'] in item['categories']:
                feed.add_item(**feed_entries[key])
        outfilename = os.path.join(app.builder.outdir,
                                   feedvar_settings['filename'])
        # make sure the directory exists
        feed_dir = os.path.dirname(outfilename)
        if feed_dir and not os.path.exists(feed_dir):
            os.makedirs(os.path.dirname(outfilename))
        fp = open(outfilename, 'w')
        feed.write(fp, 'utf-8')
        fp.close()
示例#24
0
def get_rss_amazon(key_words):
    store = file.Storage('token.json')
    creds = store.get()
    if not creds or creds.invalid:
        flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
        creds = tools.run_flow(flow, store)
    service = build('drive', 'v3', http=creds.authorize(Http()))
    title_list = []
    list_rating = []
    list_review = []
    list_url = []
    for i in key_words:
        results = list(amazonscraper.search(i))
        print(i)
        print(results)
        for result in results:
            title_list.append(result.title)
            list_rating.append(result.rating)
            list_review.append(result.review_nb)
            list_url.append(result.url)

    result = zip(title_list, list_rating, list_review, list_url)
    feed = feedgenerator.Rss201rev2Feed(title="all events",
                                        link="https://www.amazon.com/",
                                        description="New in amazon",
                                        language="en")
    for info in result:
        feed.add_item(title=info[0],
                      link=info[3],
                      description=info[1],
                      unique_id='no')
    with open('rss_by_keywords_amazon.rss', 'w') as fp:
        feed.write(fp, 'utf-8')
    file_metadata = {'name': 'rss_by_keywords_amazon.rss'}
    media = MediaFileUpload('rss_by_keywords_amazon.rss',
                            mimetype='text/plain',
                            resumable=True)
    fili = service.files().create(body=file_metadata,
                                  media_body=media,
                                  fields='id').execute()
    o = feed.writeString('utf-8')
    soup = BeautifulSoup(o, "xml")
    soup = soup.prettify()
    with open('templates/rss_by_keywords_amazon.rss', 'w') as fp:
        fp.write(str(soup))
示例#25
0
def save_as_feed(f, posts):
    """
    주문 내역을 피드로 저장합니다.
    """
    # Rss201rev2Feed 객체를 생성합니다.
    feed = feedgenerator.Rss201rev2Feed(title='네이버페이 주문 이력',
                                        link='https://order.pay.naver.com/',
                                        description='주문 이력')

    # 피드를 추가합니다.
    for post in posts:
        feed.add_item(title=post['title'],
                      link=post['url'],
                      description=post['description'],
                      unique_id=post['url'])

    # 피드를 저장합니다.
    feed.write(f, 'utf-8')
示例#26
0
def builtin_main(feed_url):
    """Accepts a feed URL and adds full-content to each entry."""
    f = fetch_and_prepare_feed(feed_url)
    if not f['feed'].get('description', None):
        if f['feed']['title']:
            f['feed']['description'] = f['feed']['title']
        elif f['feed']['link']:
            f['feed']['description'] = f['feed']['link']
        else:
            f['feed']['description'] = "Unknown title"
    title = f['feed'].pop('title')
    link = f['feed'].pop('link')
    description = f['feed'].pop('description')
    o = feedgenerator.Rss201rev2Feed(title, link, description, **f['feed'])
    for i in f['entries']:
        url = i["link"]
        # if we're looking at a reddit site ignore their
        # broken link structure and use their [link] link (*facepalm*)
        if "reddit.com/r/" in link:
            original_html = i.get("description", None) or i.get("summary", None)
            s = bs4.BeautifulSoup(original_html)
            for a in s.find_all('a', href=True):
                if a.contents == [u"[link]"] and a.get("href") and not (a.get("href").lower().endswith(".png") or a.get("href").lower().endswith(".gif") or a.get("href").lower().endswith(".jpg")):
                    url = a.get("href")
                    break
        if url:
            # fake our user agent because some sites are crybabies
            req = urllib2.Request(url, None, {'User-Agent': 'Mozilla/5.0'})
            html = urllib2.urlopen(req).read()
            if html:
                try:
                    i["description"] = fulltext(html)
                except:
                    sys.stderr.write("Unable to parse: %s\n" % url)
                else:
                    try:
                        o.add_item( **i )
                    except:
                        sys.stderr.write("Unable to add: %s\n" % url)
            else:
                sys.stderr.write("Unable to fetch: %s\n" % url)
    feedstr = o.writeString("utf-8")
    feedstr = bs4.BeautifulSoup(feedstr, 'xml').prettify().encode("utf-8", "ignore")
    return feedstr
示例#27
0
def get_rss_etsy(key_words):
    store = file.Storage('token.json')
    creds = store.get()
    if not creds or creds.invalid:
        flow = client.flow_from_clientsecrets('credentials.json', SCOPES)
        creds = tools.run_flow(flow, store)
    service = build('drive', 'v3', http=creds.authorize(Http()))
    title_list = []
    description_list = []
    url_list = []
    api = EtsyAPI(api_key='trq2ib58r0zqr51mmnlau3yn')
    url = '&'.join(key_words)
    url = '/listings/active?keywords=' + url
    r = api.get(url)
    data = r.json()
    for i in range(len(data['results'])):
        title_list.append(data['results'][i]['title'])
        description_list.append(data['results'][i]['description'])
        url_list.append(data['results'][i]['url'])
    result = zip(title_list, url_list, description_list)
    feed = feedgenerator.Rss201rev2Feed(title="all events",
                                        link="https://etsy.com/",
                                        description="New in etsy",
                                        language="en")
    for info in result:
        feed.add_item(title=info[0],
                      link=info[1],
                      description=info[2],
                      unique_id='no')
    with open('rss_by_keywords_etsy.rss', 'w') as fp:
        feed.write(fp, 'utf-8')
    file_metadata = {'name': 'rss_by_keywords_etsy.rss'}
    media = MediaFileUpload('rss_by_keywords_etsy.rss',
                            mimetype='text/plain',
                            resumable=True)
    fili = service.files().create(body=file_metadata,
                                  media_body=media,
                                  fields='id').execute()
    o = feed.writeString('utf-8')
    soup = BeautifulSoup(o, "xml")
    soup = soup.prettify()
    with open('templates/rss_by_keywords_etsy.rss', 'w') as fp:
        fp.write(str(soup))
示例#28
0
def write_feed():
    feed_args = {
        'title': 'adamzap.com',
        'link': 'http://adamzap.com/',
        'description': 'New content from adamzap.com',
    }

    feed = feedgenerator.Rss201rev2Feed(**feed_args)

    for page in sum(SECTIONS.values(), []):
        item_args = {
            'title': page['title'],
            'description': page['content'],
            'link': 'http://adamzap.com/' + page['href']
        }

        feed.add_item(**item_args)

    with open(OUT_DIR + 'feed.xml', 'w') as feed_file:
        feed.write(feed_file, 'ascii')
示例#29
0
def save_as_feed(f, posts):
    """
    文章コンテンツのリストをフィードとして保存する
    """
    
    # フィードを表すRss201rev2Feedオブジェクトを作成する
    feed = feedgenerator.Rss201rev2Feed(
        title='おすすめノート', # フィードのタイトル
        link='https://note.mu/',  #  フィードに対応するwebサイトのURL
        description='おすすめノート') # フィードの概要
    
    for post in posts:
        # フィードにアイテムを追加する
        # キーワード引数unique_idは、アイテムを一意に識別するユニークなIDを指定する
        # 必須ではないが、このIDを指定しておくとRSSリーダーがアイテムの重複なく扱える
        # 可能性が高まるので、ここではコンテンツのURLを指定している
        feed.add_item(title=post['title'], link=post['url'],
                     description=post['description'], unique_id=post['url'])
        
    feed.write(f, 'utf-8') # ファイルオブジェクトに書き込む。第2引数にエンコーディングを指定する
示例#30
0
def rss():
    feed = feedgenerator.Rss201rev2Feed(
        title="AFPyro",
        description="AFPyro feeds",
        link="http://afpyro.afpy.org",
        language="fr",
    )
    items = []
    filenames = glob.glob(os.path.join(docs, "*", "*", "*.html"))
    for filename in filenames:
        doc = pq(filename=filename)
        doc("a.headerlink").remove()
        title = doc("h1:first").text()
        doc("h1:first").remove()
        body = doc(".body").html()
        path = filename.split("/dates/")[1]
        path, ext = os.path.splitext(path)
        day = path.split("/")[1]
        y, m, d = day.split("_")
        pubdate = datetime.datetime(int(y), int(m), int(d))
        items.append((
            pubdate,
            dict(
                title=title,
                link="http://afpyro.afpy.org/dates/%s.html" % path,
                pubdate=pubdate,
                description=body,
            ),
        ))

    items = sorted(items, reverse=True)

    for d, i in items:
        feed.add_item(**i)

    fd = io.StringIO()
    feed.write(fd, "utf-8")
    fd.seek(0)
    data = fd.read()
    return Response(data, content_type="application/rss+xml")