Python normalize_entries示例，ch14_feed_normalizer.normalize_entries Python示例

示例#1

0

显示文件

文件： ch15_feed_filter.py 项目： lmorchard/hacking_rss_and_atom

    def produce_entries(self):
        """
        Filter entries from a feed using the regex map, use the
        feed normalizer to produce FeedEntryDict objects.
        """
        # Use the cache to get the feed for filtering.
        feed_data = feedparser.parse(self.feed_uri)

        # Build the output feed's normalized metadata
        self.FEED_META = normalize_feed_meta(feed_data, self.date_fmt)
        
        # Now, apply the regex map to filter each incoming entry.
        entries_filtered = []
        for entry in feed_data.entries:
            # Initially assume the entry is okay for inclusion.
            ok_include = True
            
            # Iterate through each entry key and regex pair.
            for k,r in self.filter_re.items():
                # The first time a field of the entry fails to match
                # the regex map criteria, reject it for inclusion.
                if not (entry.has_key(k) and r.match(entry[k])):
                    ok_include = False
                    break
            
            # Finally, if the entry passes all the tests, include it.
            if ok_include: entries_filtered.append(entry)
                    
        # Normalize all the filtered entries
        entries = normalize_entries(entries_filtered)
        for entry in entries:
             entry.date_fmt = self.date_fmt

        return entries

示例#2

0

显示文件

文件： ch18_mod_event_feed_filter.py 项目： openstake/hacking_rss_and_atom

    def produce_entries(self):
        """
        Get a feed, attempt to parse out hCalendar content
        and add mod_event metadata based on it.
        """
        # Grab and parse the feed
        feed = feedparser.parse(self.main_feed)
        
        # Normalize feed meta data
        self.FEED_META = normalize_feed_meta(feed, self.date_fmt)

        # Run through all the normalized entries...
        hp = HCalendarParser()
        entries = normalize_entries(feed.entries)
        for entry in entries:
            events = hp.parse(entry.data['summary'])
            if events:
                event = events[0]

                if 'dtstart' in event:
                    dtstart = event.decoded('dtstart')
                    entry.data['ev_startdate'] = \
                        dtstart.strftime('%Y-%m-%dT%H:%M:%SZ')

                if 'dtend' in event:
                    dtend = event.decoded('dtend')
                    entry.data['ev_enddate'] = \
                        dtend.strftime('%Y-%m-%dT%H:%M:%SZ')
        
        return entries

示例#3

0

显示文件

    def produce_entries(self):
        """
        Filter entries from a feed using the regex map, use the
        feed normalizer to produce FeedEntryDict objects.
        """
        # If this hasn't already been done, filter aggregator entries.
        if len(self.entries_filtered) < 1:
            self.filter_aggregator_entries()

        # Normalize all the filtered entries
        entries = normalize_entries(self.entries_filtered)
        for e in entries:
            e.date_fmt = self.date_fmt

        return entries

示例#4

0

显示文件

文件： ch15_bayes_filter.py 项目： lmorchard/hacking_rss_and_atom

 def produce_entries(self):
     """
     Filter entries from a feed using the regex map, use the
     feed normalizer to produce FeedEntryDict objects.
     """
     # If this hasn't already been done, filter aggregator entries.
     if len(self.entries_filtered) < 1:
         self.filter_aggregator_entries()
         
     # Normalize all the filtered entries
     entries = normalize_entries(self.entries_filtered)
     for e in entries:
         e.date_fmt = self.date_fmt
     
     return entries

示例#5

0

显示文件

    def produce_entries(self):
        """
        Use FeedNormalizer to get feed entries, then merge
        the lists together.
        """
        entries = []

        # Iterate and gather normalized entries for each feed.
        for feed_uri in self.feed_uris:

            # Grab and parse the feed
            feed_data = feedparser.parse(HTTPCache(feed_uri).content())

            # Append the list of normalized entries onto merged list.
            curr_entries = normalize_entries(feed_data.entries)
            for e in curr_entries:
                if self.INCLUDE_TITLE:
                    e['title'] = "["+ feed_data.feed.title + "] " + \
                                 e.data['title']
            entries.extend(curr_entries)

        return entries

示例#6

0

显示文件

文件： ch16_feed_merger.py 项目： lmorchard/hacking_rss_and_atom

 def produce_entries(self):
     """
     Use FeedNormalizer to get feed entries, then merge
     the lists together.
     """
     entries = []
     
     # Iterate and gather normalized entries for each feed.
     for feed_uri in self.feed_uris:
         
         # Grab and parse the feed
         feed_data = feedparser.parse(HTTPCache(feed_uri).content())
         
         # Append the list of normalized entries onto merged list.
         curr_entries = normalize_entries(feed_data.entries)
         for e in curr_entries:
             if self.INCLUDE_TITLE:
                 e['title'] = "["+ feed_data.feed.title + "] " + \
                              e.data['title']
         entries.extend(curr_entries)
     
     return entries

示例#7

0

显示文件

    def produce_entries(self):
        """
        Use FeedNormalizer to get feed entries, then merge
        the lists together.
        """
        # Grab and parse the feed
        feed = feedparser.parse(HTTPCache(self.main_feed).content())

        # Normalize feed meta data
        self.FEED_META = normalize_feed_meta(feed, self.date_fmt)
        self.FEED_META['feed.title'] += ' (with related links)'

        # Normalize entries from the feed
        entries = normalize_entries(feed.entries)

        # Run through all the normalized entries...
        for e in entries:

            # Perform a search on the entry title, extract the items
            result = self.technorati_search(e['title'])
            items = [x for x in result if x._name == 'item']

            # Use each search result item to populate the templates.
            insert_items = [
                self.INSERT_ITEM_TMPL % {
                    'weblog.name': i.weblog.name,
                    'weblog.url': i.weblog.url,
                    'title': i.title,
                    'permalink': i.permalink
                } for i in items
            ]
            insert_out = self.INSERT_TMPL % '\n'.join(insert_items)

            # Append the rendered search results onto the entry summary.
            e.data['summary'] += insert_out.decode('utf-8', 'ignore')

        return entries

示例#8

0

显示文件

文件： ch16_feed_amazon_ads.py 项目： openstake/hacking_rss_and_atom

    def produce_entries(self):
        """
        Use FeedNormalizer to get feed entries, then merge
        the lists together.
        """
        # Grab and parse the feed
        feed = feedparser.parse(HTTPCache(self.main_feed).content())

        # Normalize feed meta data
        self.FEED_META = normalize_feed_meta(feed, self.date_fmt)
        self.FEED_META['feed.title'] += ' (with Amazon items)'

        # Normalize entries from the feed
        entries = normalize_entries(feed.entries)

        # Run through all the normalized entries...
        for e in entries:

            # Perform a search on the entry title, extract the items
            result = self.amazon_search(e['summary'])
            items = [x for x in result.Items if 'Item' in x._name]

            # Use each search result item to populate the templates.
            insert_items = [
                self.INSERT_ITEM_TMPL % {
                    'title': i.ItemAttributes.Title,
                    'url': i.DetailPageURL,
                    'img': i.SmallImage.URL
                } for i in items[:self.MAX_ITEMS]
            ]
            insert_out = self.INSERT_TMPL % '\n'.join(insert_items)

            # Append the rendered search results onto the entry summary.
            e.data['summary'] += insert_out.decode('utf-8', 'ignore')

        return entries

示例#9

0

显示文件

文件： ch16_feed_delicious_recaps.py 项目： openstake/hacking_rss_and_atom

    def produce_entries(self):
        """
        Normalize the source feed, insert del.icio.us daily link recaps.
        """
        # Grab and parse the feed
        feed = feedparser.parse(HTTPCache(self.main_feed).content())

        # Normalize feed meta data
        self.FEED_META = normalize_feed_meta(feed, self.date_fmt)
        self.FEED_META['feed.title'] += ' (with del.icio.us links)'

        # Normalize entries from the feed
        entries = normalize_entries(feed.entries)

        # Iterate through a number of past days' links
        for n in range(self.NUM_DAYS):

            # Calculate and format date for this query
            post_secs = time.time() - ((n + 1) * 24 * 60 * 60)
            post_time = time.localtime(post_secs)
            post_dt = time.strftime('%Y-%m-%d', post_time)

            # Prepare for Basic Authentication in calling del API
            auth = urllib2.HTTPBasicAuthHandler()
            auth.add_password('del.icio.us API', 'del.icio.us', self.DEL_USER,
                              self.DEL_PASSWD)
            urllib2.install_opener(urllib2.build_opener(auth))

            # Build del API URL, execute the query, and parse response.
            url = self.DEL_API_URL % post_dt
            data = HTTPCache(url).content()
            doc = xmltramp.parse(data)

            # Skip this day if no posts resulted from the query
            if not len(doc) > 0: continue

            # Iterate through all posts retrieved, build content for entry.
            post_out = []
            for post in doc:

                # Run through post tags, render links with template.
                tags_out = [
                    self.DEL_TAG_TMPL % {
                        'tag': t,
                        'href': 'http://del.icio.us/%s/%s' % (self.DEL_USER, t)
                    } for t in post("tag").split()
                ]

                # Build content for this link posting using template.
                try:
                    extended = post('extended')
                except:
                    extended = ''

                post_out.append(
                    self.DEL_LINK_TMPL % {
                        'href': post('href'),
                        'description': post('description'),
                        'extended': extended,
                        'tags': ''.join(tags_out)
                    })

            # Construct and append a new feed entry based on the day's links
            new_entry = FeedEntryDict(date_fmt=self.date_fmt, init_dict={
                'title'    : 'del.icio.us links on %s' % post_dt,
                'issued'   : post_secs,
                'modified' : post_secs,
                'link'     : 'http://del.icio.us/%s#%s' % \
                             (self.DEL_USER, post_dt),
                'summary'  : self.DEL_ENTRY_TMPL % "\n".join(post_out)
            })
            entries.append(new_entry)

            # Pause, because http://del.icio.us/doc/api says so.
            time.sleep(1)

        # Return the list of entries built
        return entries

示例#10

0

显示文件

文件： ch16_feed_delicious_recaps.py 项目： lmorchard/hacking_rss_and_atom

    def produce_entries(self):
        """
        Normalize the source feed, insert del.icio.us daily link recaps.
        """
        # Grab and parse the feed
        feed = feedparser.parse(HTTPCache(self.main_feed).content())
        
        # Normalize feed meta data
        self.FEED_META = normalize_feed_meta(feed, self.date_fmt)
        self.FEED_META['feed.title'] += ' (with del.icio.us links)'

        # Normalize entries from the feed
        entries = normalize_entries(feed.entries)

        # Iterate through a number of past days' links
        for n in range(self.NUM_DAYS):

            # Calculate and format date for this query
            post_secs = time.time() - ( (n+1) * 24 * 60 * 60 ) 
            post_time = time.localtime(post_secs)
            post_dt   = time.strftime('%Y-%m-%d', post_time)

            # Prepare for Basic Authentication in calling del API
            auth = urllib2.HTTPBasicAuthHandler()    
            auth.add_password('del.icio.us API', 'del.icio.us', 
                              self.DEL_USER, self.DEL_PASSWD)
            urllib2.install_opener(urllib2.build_opener(auth))
            
            # Build del API URL, execute the query, and parse response.
            url  = self.DEL_API_URL % post_dt
            data = HTTPCache(url).content()
            doc  = xmltramp.parse(data)

            # Skip this day if no posts resulted from the query
            if not len(doc) > 0: continue

            # Iterate through all posts retrieved, build content for entry.
            post_out = []
            for post in doc:
                
                # Run through post tags, render links with template.
                tags_out = [ self.DEL_TAG_TMPL % {
                    'tag'  : t,
                    'href' : 'http://del.icio.us/%s/%s' % (self.DEL_USER, t)
                } for t in post("tag").split() ]
                
                # Build content for this link posting using template.
                try:    extended = post('extended')
                except: extended = ''

                post_out.append(self.DEL_LINK_TMPL % {
                    'href'        : post('href'),
                    'description' : post('description'),
                    'extended'    : extended,
                    'tags'        : ''.join(tags_out)
                })
                    
            # Construct and append a new feed entry based on the day's links
            new_entry = FeedEntryDict(date_fmt=self.date_fmt, init_dict={
                'title'    : 'del.icio.us links on %s' % post_dt,
                'issued'   : post_secs,
                'modified' : post_secs,
                'link'     : 'http://del.icio.us/%s#%s' % \
                             (self.DEL_USER, post_dt),
                'summary'  : self.DEL_ENTRY_TMPL % "\n".join(post_out)
            })
            entries.append(new_entry)
        
            # Pause, because http://del.icio.us/doc/api says so.
            time.sleep(1) 
            
        # Return the list of entries built
        return entries