def _update_feed(feed, content, context, method): """ feed - RemoteFeed instance updates feed and returns a list of new/updated NewsItems no changes are pushed to the database. """ try: fp = parse_feed(content, feed.url) except: feed.record_update_info(success=False, updates=0, reason='Feed could not be parsed', method=method) import traceback log.error("unable to parse feed %s: %s" % (feed.url, traceback.format_exc())) return [] updated_items = _find_updates(feed, fp) # update feed metadata feed.feed_info = deepcopy(fp.feed) feed.title = fp.feed.get('title', '') # add item to RemoteFeed, gather ids iids = [] traces = {} for item in updated_items: trace = item_trace(item) traces[item.melk_id] = trace ref = {'item_id': item.melk_id} ref.update(trace) feed.add_news_item(ref) feed.record_update_info(success=True, updates=len(updated_items), method=method) # grab any existing entries (need revisions to push update)... save_items = {} for r in context.db.view('_all_docs', keys=traces.keys(), include_docs=True).rows: if 'doc' in r: save_items[r.key] = NewsItem.wrap(r.doc) for item in updated_items: iid = item.melk_id trace = traces[iid] news_item = save_items.get(iid, None) if news_item is None: # if it is a new entry, create it news_item = NewsItem(iid, **trace) save_items[item.melk_id] = news_item else: # otherwise, just update fields for k, v in trace.items(): setattr(news_item, k, v) news_item.details = item return save_items
def _bucket_latest_entries_batch(bucket, limit=DEFAULT_BATCH_SIZE, startkey=None, startkey_docid=None): limit = min(limit, MAX_BATCH_SIZE) query = dict( limit=limit + 1, # request 1 more than limit to see if there's a next batch startkey=[bucket.id, {}], # initial batch; overridden for subsequent batches below endkey=[bucket.id], include_docs=True, descending=True, ) if startkey is not None: # subsequent batches assert startkey_docid is not None, 'startkey given with no startkey_docid' query.update(startkey=startkey, startkey_docid=startkey_docid) rows = list(view_entries_by_timestamp(ctx.db, **query)) if len(rows) > limit: # there's another batch after this one lastrow = rows.pop() next = url_for('bucket_latest_entries', bucket=bucket, startkey=json_sleep(lastrow.key), startkey_docid=lastrow.id, ) else: next = None #entries = [tidy_entry(NewsItemRef.from_doc(r.doc, ctx)) for r in rows] entryids = [r.doc['item_id'] for r in rows] entries = [tidy_entry(i) for i in NewsItem.get_by_ids(entryids, ctx)] return (entries, next)
def _bucket_latest_entries_batch(bucket, limit=DEFAULT_FEED_SIZE): limit = min(limit, MAX_FEED_SIZE) query = dict( limit=limit, startkey=[bucket.id, {}], endkey=[bucket.id], include_docs=True, descending=True, ) entryids = [r.doc['item_id'] for r in view_entries_by_timestamp(ctx.db, **query)] return [tidy_entry(i) for i in NewsItem.get_by_ids(entryids, ctx)]