示例#1
0
def get_rss(client, hours, cat):
    """
    Generate and return the aggregate rss feed.
    """
    from PyRSS2Gen import RSS2, RSSItem
    from StringIO import StringIO

    t = time.time() - int(hours or 24)*60*60
    start_row = build_key(cat or '__all__', t)
    stop_row = build_key(cat or '__all__', time.time())

    items = []
    scanner = db.Scanner(client, 'UrlsIndex', ['Url:'], start_row, stop_row)
    for row in scanner:
        url = client.getRow('Urls', row.columns['Url:'].value)[0]
        items.append(RSSItem(
            title = url.columns['Content:title'].value.decode('utf-8', 'replace'),
            link = url.row,
            description = url.columns['Content:raw'].value.decode('utf-8', 'replace'),
            pubDate = datetime.fromtimestamp(float(url.columns['Meta:updated'].value))
        ))
    items.reverse()
    rss = RSS2(
        title = 'Aggregated feed',
        link = 'http://example.com/rss',
        description = 'Hbase aggregated feed',
        lastBuildDate = datetime.now(),
        items = items
    )
    out = StringIO()
    rss.write_xml(out)
    return out.getvalue()
示例#2
0
def dump_urls(client, hours, cat):
    """
    Dump on stdout an aggregated list of urls
    """
    t = time.time() - int(hours or 24)*60*60
    start_row = build_key(cat or '__all__', t)
    stop_row = build_key(cat or '__all__', time.time())
    
    scanner = db.Scanner(client, 'UrlsIndex', ['Url:'], start_row, stop_row)
    urls = [row.columns['Url:'].value for row in scanner]
    urls.reverse()
    for url in urls: print url