Exemplo n.º 1
0
def rss_fetch(source, markup, url, add_rpc, got_result):    
    parsed = feedparser.parse(markup)
    # pprint(parsed)
    
    if len(parsed['entries']) == 0:
        return None
    
    source_entry_processor = create_source_entry_processor(url)
    feed_title = parsed['feed']['title']
    entries = []
    latest_date = None
    for entry in parsed['entries']:
        if 'link' in entry:
            # print entry
            link_url = urljoin(url, entry['link'].strip())
            title = entry['title']
            
            pub_time = entry.get('published_parsed')
            if pub_time:
                published = datetime.datetime.fromtimestamp(mktime(pub_time))
            else:
                published = None
            result_entry = {"title": title, "url": link_url, "published": published}
            source_entry_processor(result_entry, entry)
            entries.append(result_entry)
    
    got_result(FetchResult('rss', feed_title, entries))
Exemplo n.º 2
0
def rss_fetch(data, feed_content):
    url = data['url']
    if not feed_content:
        markup = util.url_fetch(url)
        if markup:
            feed_content = parse_as_feed(markup)
    
    if not feed_content:
        return None
    
    parsed = feed_content
    
    source_entry_processor = create_source_entry_processor(url)
    feed_title = parsed['feed']['title']
    entries = []
    latest_date = None
    for entry in parsed['entries']:
        if 'link' in entry and 'title' in entry:
            # print entry
            link_url = urljoin(url, entry['link'].strip())
            title = entry['title']
            
            pub_time = entry.get('published_parsed', entry.get('updated_parsed'))
            if pub_time:
                published = datetime.datetime.fromtimestamp(mktime(pub_time))
            else:
                published = None
            result_entry = {"title": title, "url": link_url, "published": published}
            source_entry_processor(result_entry, entry)
            entries.append(result_entry)
    
    return FetchResult('rss', feed_title, entries)