Пример #1
0
def _update_feed(feed, content, context, method):
    """
    feed - RemoteFeed instance
    updates feed and returns a list of new/updated NewsItems
    
    no changes are pushed to the database.
    """
        
    try:
        fp = parse_feed(content, feed.url)
    except:
        feed.record_update_info(success=False, updates=0,
                            reason='Feed could not be parsed',
                            method=method)
        import traceback
        log.error("unable to parse feed %s: %s" % (feed.url, traceback.format_exc()))
        return []

    updated_items = _find_updates(feed, fp)

    # update feed metadata
    feed.feed_info = deepcopy(fp.feed)
    feed.title = fp.feed.get('title', '')
    
    # add item to RemoteFeed, gather ids
    iids = []
    traces = {}
    for item in updated_items:
        trace = item_trace(item)
        traces[item.melk_id] = trace
        ref = {'item_id': item.melk_id}
        ref.update(trace)
        feed.add_news_item(ref)
    feed.record_update_info(success=True, updates=len(updated_items), method=method)

    # grab any existing entries (need revisions to push update)...
    save_items = {}
    for r in context.db.view('_all_docs', keys=traces.keys(), include_docs=True).rows:
        if 'doc' in r:
            save_items[r.key] = NewsItem.wrap(r.doc)

    for item in updated_items:
        iid = item.melk_id
        trace = traces[iid]
        
        news_item = save_items.get(iid, None)
        if news_item is None:
            # if it is a new entry, create it
            news_item = NewsItem(iid, **trace)
            save_items[item.melk_id] = news_item
        else:
            # otherwise, just update fields
            for k, v in trace.items():
                setattr(news_item, k, v)
        news_item.details = item

    return save_items
Пример #2
0
def _bucket_latest_entries_batch(bucket, limit=DEFAULT_BATCH_SIZE, startkey=None, startkey_docid=None):
    limit = min(limit, MAX_BATCH_SIZE)
    query = dict(
        limit=limit + 1, # request 1 more than limit to see if there's a next batch
        startkey=[bucket.id, {}], # initial batch; overridden for subsequent batches below
        endkey=[bucket.id],
        include_docs=True,
        descending=True,
        )
    if startkey is not None: # subsequent batches
        assert startkey_docid is not None, 'startkey given with no startkey_docid'
        query.update(startkey=startkey, startkey_docid=startkey_docid)

    rows = list(view_entries_by_timestamp(ctx.db, **query))
    if len(rows) > limit: # there's another batch after this one
        lastrow = rows.pop()
        next = url_for('bucket_latest_entries',
            bucket=bucket,
            startkey=json_sleep(lastrow.key),
            startkey_docid=lastrow.id,
            )
    else:
        next = None

    #entries = [tidy_entry(NewsItemRef.from_doc(r.doc, ctx)) for r in rows]
    entryids = [r.doc['item_id'] for r in rows]
    entries = [tidy_entry(i) for i in NewsItem.get_by_ids(entryids, ctx)]
    return (entries, next)
Пример #3
0
def _bucket_latest_entries_batch(bucket, limit=DEFAULT_FEED_SIZE):
    limit = min(limit, MAX_FEED_SIZE)
    query = dict(
        limit=limit,
        startkey=[bucket.id, {}],
        endkey=[bucket.id],
        include_docs=True,
        descending=True,
        )
    entryids = [r.doc['item_id'] for r in view_entries_by_timestamp(ctx.db, **query)]
    return [tidy_entry(i) for i in NewsItem.get_by_ids(entryids, ctx)]