def get_rss(client, hours, cat): """ Generate and return the aggregate rss feed. """ from PyRSS2Gen import RSS2, RSSItem from StringIO import StringIO t = time.time() - int(hours or 24)*60*60 start_row = build_key(cat or '__all__', t) stop_row = build_key(cat or '__all__', time.time()) items = [] scanner = db.Scanner(client, 'UrlsIndex', ['Url:'], start_row, stop_row) for row in scanner: url = client.getRow('Urls', row.columns['Url:'].value)[0] items.append(RSSItem( title = url.columns['Content:title'].value.decode('utf-8', 'replace'), link = url.row, description = url.columns['Content:raw'].value.decode('utf-8', 'replace'), pubDate = datetime.fromtimestamp(float(url.columns['Meta:updated'].value)) )) items.reverse() rss = RSS2( title = 'Aggregated feed', link = 'http://example.com/rss', description = 'Hbase aggregated feed', lastBuildDate = datetime.now(), items = items ) out = StringIO() rss.write_xml(out) return out.getvalue()
def dump_urls(client, hours, cat): """ Dump on stdout an aggregated list of urls """ t = time.time() - int(hours or 24)*60*60 start_row = build_key(cat or '__all__', t) stop_row = build_key(cat or '__all__', time.time()) scanner = db.Scanner(client, 'UrlsIndex', ['Url:'], start_row, stop_row) urls = [row.columns['Url:'].value for row in scanner] urls.reverse() for url in urls: print url