def main(): """ Test out LogBufferFeed by maintaining a random number feed. """ # Construct the feed generator f = LogBufferFeed('random_feed') f.FEED_META['feed.title'] = 'Random Number of the Moment' f.FEED_META['feed.tagline'] = 'Serving your random number needs.' f.MAX_ENTRIES = 4 f.MAX_AGE = 30 #10 * 60 # 10 minutes # Construct and append a new entry import random num = random.random() * 1000 entry = FeedEntryDict({ 'title': 'Random number %s' % num, 'link': '', 'summary': 'Here is another random number for you: %s' % num }) f.append_entry(entry) # Output the current feed entries if len(sys.argv) > 1 and sys.argv[1] == 'rss': print f.scrape_rss() else: print f.scrape_atom()
def produce_entries(self): """ Produce feed entries from Google product item data. """ # Start off with an empty list for entries. entries = [] # Execute the Google search data = google.doGoogleSearch(self.search_query, license_key=self.license_key) # Run through all fetched items, building entries for result in data.results: # Map the web search result data to feed entry properties entry = FeedEntryDict(date_fmt=self.date_fmt, init_dict={ 'title': result.directoryTitle or '(untitled)', 'link': result.URL, 'summary': result.snippet, }) # Append completed entry to list entries.append(entry) return entries
def produce_entries(self): """ Produce feed entries from Yahoo! product item data. """ # Start off with an empty list for entries. entries = [] # Create a new Yahoo! API web search search = NewsSearch(self.app_id, query=self.search_query, sort='date', results=50) # Run through all fetched items, building entries for result in search.parse_results(): # Map the web search result data to feed entry properties entry = FeedEntryDict(date_fmt=self.date_fmt, init_dict={ 'title' : '[%s] %s' % \ (result['NewsSource'], result['Title']), 'link' : result['ClickUrl'], 'summary' : result['Summary'], 'author.name' : result['NewsSource'], 'author.link' : result['NewsSourceUrl'], 'modified' : int(result['ModificationDate']), 'issued' : int(result['PublishDate']), }) # Append completed entry to list entries.append(entry) return entries
def main(): """ Report new errors found in Apache logs. """ # Construct the feed generator f = LogBufferFeed(FEED_DIR) f.MAX_AGE = 24 * 60 * 60 # 1 day f.FEED_META['feed.title'] = '%s Apache Errors' % SITE_NAME f.FEED_META['feed.tagline'] = \ 'New errors from Apache on %s' % SITE_NAME # If there were new referrers found, insert a new entry. new_lines = bookmark_tailgrep(ERROR_LOG, max_initial_lines=3000) if len(new_lines) > 0: # Construct and append a new entry esc_lines = [escape(x) for x in new_lines] entry = FeedEntryDict({ 'title' : '%s new lines of errors' % len(new_lines), 'link' : '', 'summary' : """ <div style="font-family:monospace"> %s </div> """ % "<br />\n".join(esc_lines) }) f.append_entry(entry) # Output the current feed entries as both RSS and Atom open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss()) open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom())
def main(): """ Detect login activity changes and report in feed. """ # Construct the feed generator f = LogBufferFeed(FEED_DIR) f.MAX_AGE = 24 * 60 * 60 # 1 day f.FEED_META['feed.title'] = '%s Login Activity' % SITE_NAME f.FEED_META['feed.tagline'] = \ 'Summary of login activity on the %s server' % SITE_NAME # Call the command and capture output (sout, sin) = popen4(COMMAND) new_lines = [ x for x in sout.readlines() if x.find('reboot') == -1 ] # Attempt load up output from the previous run. old_lines = None old_output_fn = os.path.join(FEED_DIR, 'old_output.gz') if os.path.exists(old_output_fn): old_lines = gzip.open(old_output_fn, "r").readlines() # If there is previous output, check for changes... if old_lines: # Run a diff on the previous and current program output. diff_lines = [ x for x in difflib.ndiff(old_lines, new_lines) ] # Extract only the lines that have changed. changes_lines = [ x for x in diff_lines if x.startswith('-') or x.startswith('+') ] # Construct and append a new entry if there were changes if len(changes_lines) > 0: esc_changes_lines = [escape(x) for x in changes_lines] esc_diff_lines = [escape(x) for x in diff_lines] entry = FeedEntryDict({ 'link' : '', 'title' : TITLE_TMPL % { 'changes' : len(changes_lines) }, 'summary' : SUMMARY_TMPL % { 'changes_lines' : "<br />".join(esc_changes_lines), 'diff_lines' : "<br />".join(esc_diff_lines) } }) f.append_entry(entry) # Save output from the current run for use next time. gzip.open(old_output_fn, "w").write("".join(new_lines)) # Output the current feed entries as both RSS and Atom open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss()) open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom())
def normalize_entries(entries_in, full_content=True): """ Return a list of normalized FeedEntryDict objects, given a list of entries from the feedparser. """ entries = [] # Process incoming feed entries. for entry_in in entries_in: # Create the empty new output feed entry. entry_out = FeedEntryDict() entry_out.orig = entry_in # Perform a straight copy of a few entry attributes. for n in ('id', 'title', 'link'): if entry_in.has_key(n): entry_out[n] = entry_in[n] # Convert feedparser time tuples to seconds and copy over. for n in ('modified', 'issued'): if entry_in.get('%s_parsed' % n, None): entry_out[n] = calendar.timegm(entry_in['%s_parsed' % n]) # Decide whether to copy only summary or full content. if full_content and entry_in.has_key('content'): content_list = [ x.value for x in entry_in.content if 'text' in x.type ] entry_out['summary'] = ''.join(content_list) elif entry_in.has_key('summary'): entry_out['summary'] = entry_in.summary # Append finished feed to list. entries.append(entry_out) # Return accumulated output feed entries. return entries
def produce_entries(self): """Use xpaths to extract feed entries and entry attributes.""" entries = [] # Iterate through the parts identified as log entry nodes. for entry_node in self.svn_log().xpath('//logentry'): # Extract a few basic elements from the log entry revision = self.xpval(entry_node, './@revision') author = self.xpval(entry_node, './author/text()') msg = self.xpval(entry_node, './msg/text()') # Extract and parse the date for the log entry date_str = self.xpval(entry_node, './date/text()') date_tup = time.strptime(date_str[:19], '%Y-%m-%dT%H:%M:%S') entry_time = calendar.timegm(date_tup) # Extract and process the list of affected file paths paths_changed = [] for path_node in entry_node.xpath('./paths/path'): action = self.xpval(path_node, './@action') path = self.xpval(path_node, './text()') paths_changed.append("%s %s" % (action, path)) entry_id = 'tag:%s%s' % (self.TAG_PREFIX, revision) # Build the feed entry based on log entry information entry = FeedEntryDict(init_dict={ 'id': entry_id, 'title': 'Revision %s by %s' % (revision, author), 'link': self.url, 'issued': entry_time, 'modified': entry_time, 'summary': "<pre>%s\n\nFiles affected:\n%s</pre>" % (msg, '\n'.join(paths_changed)) }, date_fmt=self.date_fmt) entries.append(entry) return entries
def produce_entries(self): """ Produce feed entries from Amazon product item data. """ entries = [] all_items = self.fetch_items() # Run through all fetched items, building entries for item in all_items: # Wrap the item in a template-friendly object tmpl_item = TrampTmplWrapper(item) # Build an empty entry object entry = FeedEntryDict(date_fmt=self.date_fmt) # Generate an ID for this entry based on tracked data m = md5.md5() for k in self.ITEM_TRACK: m.update(tmpl_item[k]) entry['id'] = state_id = "tag:%s,%s:%s" % \ (self.TAG_DOMAIN, self.TAG_DATE, m.hexdigest()) # Use the item detail URL for entry link entry['link'] = tmpl_item['DetailPageURL'] # Use the author, artist, or actor name for item # and entry author authors = [] for k in ('Author', 'Artist', 'Actor'): v = tmpl_item['ItemAttributes.%s' % k] if v: authors.append(v) entry['author.name'] = ", ".join(authors) # Build entry title and summary from string templates entry['title'] = self.TITLE_TMPL % tmpl_item entry['summary'] = self.SUMMARY_TMPL % tmpl_item # Append completed entry to list entries.append(entry) return entries
def produce_entries(self): """ Build feed entries based on queried CVS history events. """ events = self.client.history() entries = [] for event in events[:self.MAX_ENTRIES]: # Build a GUID for this entry cvs_id = '%(path)s:%(revision)s' % event entry_id = 'tag:%s%s' % (self.TAG_PREFIX, quote(cvs_id)) # Attempt to grab an existing state record for this entry ID. if not self.state_db.has_key(entry_id): self.state_db[entry_id] = {} entry_state = self.state_db[entry_id] # If this entry's state doesn't already have a description # cached, query CVS for the log entry and grab the it. if not entry_state.has_key('description'): log_entry = self.client.rlog(event.revision, event.path) entry_state['description'] = log_entry.description description = entry_state['description'] # Build the feed entry based on the CVS event and log entry entry = FeedEntryDict(init_dict={ 'id': entry_id, 'title': self.TITLE_TMPL % event, 'link': self.LINK_TMPL % event, 'author.name': event.user, 'modified': event.time, 'issued': event.time, 'summary': '<pre>%s</pre>' % description }, date_fmt=self.date_fmt) # Append the completed entry to the list, and save the # entry state. entries.append(entry) self.state_db[entry_id] = entry_state return entries
def entries_from_messages(self, msgs): """ Given a list of email.Message, attempt to build a list of FeedEntryDict objects """ entries = [] for msg in msgs: entry = FeedEntryDict(date_fmt=self.date_fmt) # Set the 'dummy' link for the entry from feed.link entry['link'] = self.FEED_META['feed.link'] # Use message Subject for entry title. entry['title'] = msg.get('Subject', '(Untitled)') # Use From header for entry author email. entry['author.name'] = msg['From'] # Convert message Date into seconds, use for modified # and issued msg_time_raw = email.Utils.parsedate(msg['Date']) msg_time = time.mktime(msg_time_raw) entry.data['modified'] = entry.data['issued'] = msg_time # Get a GUID for this entry. entry['id'] = self.build_guid_for_message(msg, entry) # Summarize the email for the entry. entry['summary'] = self.extract_summary_from_message(msg) # Stuff the new entry into the running list. entries.append(entry) # Finally, return what was accumulated return entries
def produce_entries(self): """ Produce feed entries from Yahoo! product item data. """ # Start off with an empty list for entries. entries = [] # Create a new Yahoo! API web search search = WebSearch(self.app_id, query=self.search_query, results=50) # Execute the query and gather results. results = [r for r in search.parse_results()] # Sort the results in reverse-chronological order by # modification date results.sort(lambda a,b: \ cmp(b['ModificationDate'], a['ModificationDate'])) # Run through all fetched items, building entries for result in results: # Map the web search result data to feed entry properties entry = FeedEntryDict(date_fmt=self.date_fmt, init_dict={ 'title': result['Title'], 'link': result['ClickUrl'], 'summary': result['Summary'], 'modified': int(result['ModificationDate']), 'issued': int(result['ModificationDate']), }) # Append completed entry to list entries.append(entry) return entries
def produce_entries(self): """ Normalize the source feed, insert del.icio.us daily link recaps. """ # Grab and parse the feed feed = feedparser.parse(HTTPCache(self.main_feed).content()) # Normalize feed meta data self.FEED_META = normalize_feed_meta(feed, self.date_fmt) self.FEED_META['feed.title'] += ' (with del.icio.us links)' # Normalize entries from the feed entries = normalize_entries(feed.entries) # Iterate through a number of past days' links for n in range(self.NUM_DAYS): # Calculate and format date for this query post_secs = time.time() - ((n + 1) * 24 * 60 * 60) post_time = time.localtime(post_secs) post_dt = time.strftime('%Y-%m-%d', post_time) # Prepare for Basic Authentication in calling del API auth = urllib2.HTTPBasicAuthHandler() auth.add_password('del.icio.us API', 'del.icio.us', self.DEL_USER, self.DEL_PASSWD) urllib2.install_opener(urllib2.build_opener(auth)) # Build del API URL, execute the query, and parse response. url = self.DEL_API_URL % post_dt data = HTTPCache(url).content() doc = xmltramp.parse(data) # Skip this day if no posts resulted from the query if not len(doc) > 0: continue # Iterate through all posts retrieved, build content for entry. post_out = [] for post in doc: # Run through post tags, render links with template. tags_out = [ self.DEL_TAG_TMPL % { 'tag': t, 'href': 'http://del.icio.us/%s/%s' % (self.DEL_USER, t) } for t in post("tag").split() ] # Build content for this link posting using template. try: extended = post('extended') except: extended = '' post_out.append( self.DEL_LINK_TMPL % { 'href': post('href'), 'description': post('description'), 'extended': extended, 'tags': ''.join(tags_out) }) # Construct and append a new feed entry based on the day's links new_entry = FeedEntryDict(date_fmt=self.date_fmt, init_dict={ 'title' : 'del.icio.us links on %s' % post_dt, 'issued' : post_secs, 'modified' : post_secs, 'link' : 'http://del.icio.us/%s#%s' % \ (self.DEL_USER, post_dt), 'summary' : self.DEL_ENTRY_TMPL % "\n".join(post_out) }) entries.append(new_entry) # Pause, because http://del.icio.us/doc/api says so. time.sleep(1) # Return the list of entries built return entries
def main(): """ Scan all feeds and update the feed with a new link popularity report entry. """ # Construct the feed generator. f = LogBufferFeed(FEED_DIR) f.MAX_AGE = 1 * 24 * 60 * 60 # 1 day f.FEED_META['feed.title'] = FEED_TITLE f.FEED_META['feed.tagline'] = FEED_TAGLINE # Load up the list of feeds. feed_uris = [x.strip() for x in open(FEEDS_FN, 'r').readlines()] # Skim for links from each feed, collect feed and entries in an # inverted index using link URLs as top-level keys. links = {} for feed_uri in feed_uris: feed_data = feedparser.parse(feed_uri) # Grab the feed metadata from parsed feed. feed = feed_data.feed feed_link = feed.get('link', '#') # Process all entries for their links... for curr_entry in feed_data.entries: # HACK: Ignore entries without modification dates. # Maybe improve this by stashing seen dates in a DB. if curr_entry.get('modified_parsed', None) is None: continue # If the current entry is older than the max allowed age, # skip processing it. now = time.time() entry_time = calendar.timegm(curr_entry.modified_parsed) if (now - entry_time) > MAX_ENTRY_AGE: continue # Build a LinkSkimmer and feed it all summary and HTML # content data from the current entry. Ignore parse # errors in the interest of just grabbing what we can. skimmer = LinkSkimmer() try: skimmer.feed(curr_entry.get('summary', '')) for c in curr_entry.get('content', []): skimmer.feed(c.value) except HTMLParseError: pass # Process each link by adding the current feed and entry # under the link's key in the inverted index. for uri, cnt in skimmer.get_links(): if not links.has_key(uri): links[uri] = {} if not links[uri].has_key(feed_link): links[uri][feed_link] = (feed, curr_entry) # Turn the inverted index of links into a list of tuples, sort by # popularity of links as measured by number of linked entries. links_sorted = links.items() links_sorted.sort(lambda a, b: cmp(len(b[1].keys()), len(a[1].keys()))) # Build the overall entry content from all the links. links_out = [] for x in links_sorted: # Get the link and the list of linkers, skip this link if there # aren't enough linkers counted. link, linkers = x if len(linkers) < MIN_LINKS: continue # Build the list of linkers for this link by populating the # LINKER_TMPL string template. linkers_out = [] for feed, entry in linkers.values(): linkers_out.append( LINKER_TMPL % { 'feed.title': feed.get('title', 'untitled'), 'feed.link': feed.get('link', '#'), 'entry.title': entry.get('title', 'untitled'), 'entry.link': entry.get('link', '#'), }) # Build the content block for this link by populating the # LINK_TMPL string template. links_out.append( LINK_TMPL % { 'link': link, 'link_cnt': len(linkers), 'linkers': '\n'.join(linkers_out) }) # Complete building the content for this entry by populating the # CONTENT_TMPL string template. out = CONTENT_TMPL % '\n'.join(links_out) # Construct and append a new entry entry = FeedEntryDict({ 'title': TITLE_TMPL % { 'link_cnt': len(links_out), 'time': time.strftime(TITLE_TIME_FMT) }, 'link': '', 'summary': out }) f.append_entry(entry) # Output the current feed entries as both RSS and Atom open(FEED_NAME_FN % 'rss', 'w').write(f.scrape_rss()) open(FEED_NAME_FN % 'atom', 'w').write(f.scrape_atom())