def test_composite_subs_by_title(ctx): from melkman.db.bucket import NewsBucket from melkman.db.composite import Composite, view_composite_subscriptions_by_title from random import shuffle cc = Composite.create(ctx) buckets = [] for i in range(10): bucket = NewsBucket.create(ctx) bucket.title = 'bucket %d' % i bucket.save() buckets.append(bucket) shuffled_buckets = list(buckets) shuffle(shuffled_buckets) for bucket in shuffled_buckets: cc.subscribe(bucket) cc.save() # should come out in alphabetical order for i, row in enumerate(view_composite_subscriptions_by_title(ctx.db, startkey=[cc.id, None], endkey=[cc.id, {}])): assert row.value['bucket_id'] == buckets[i].id assert i + 1 == len(buckets)
def test_init_subscription(ctx): from eventlet import sleep, spawn from melkman.aggregator.worker import run_aggregator from melkman.db.bucket import NewsBucket from melkman.db.composite import Composite agg = spawn(run_aggregator, ctx) c = Composite.create(ctx) c.save() entries = [] bucket = NewsBucket.create(ctx) for i in range(5): eid = random_id() entries.append(eid) bucket.add_news_item(eid) bucket.save() sleep(.5) c.subscribe(bucket) c.save() sleep(.5) c.reload() for eid in entries: assert c.has_news_item(eid) agg.kill() agg.wait()
def test_sub_loop_sane(ctx): from eventlet import sleep, spawn from melkman.aggregator.worker import run_aggregator from melkman.db.bucket import NewsBucket from melkman.db.composite import Composite agg = spawn(run_aggregator, ctx) # create two composites and subscribe them # to each other... O_O c1 = Composite.create(ctx) c2 = Composite.create(ctx) c1.save() c2.save() c1.subscribe(c2) c2.subscribe(c1) c1.save() c2.save() for i in range(10): c1.add_news_item(random_id()) c2.add_news_item(random_id()) c1.save() c2.save() sleep(1) # refresh c1.reload() c2.reload() assert len(c1.entries) == 20 assert len(c2.entries) == 20 for iid in c1.entries: assert c2.has_news_item(iid) for iid in c2.entries: assert c1.has_news_item(iid) sleep(1) agg.kill() agg.wait()
def test_composites_by_sub(ctx): from melkman.db.bucket import NewsBucket from melkman.db.composite import Composite, view_composites_by_subscription c1 = Composite.create(ctx) c2 = Composite.create(ctx) bucket1 = NewsBucket.create(ctx) bucket1.save() bucket2 = NewsBucket.create(ctx) bucket2.save() bucket3 = NewsBucket.create(ctx) bucket3.save() c1.subscribe(bucket1) c1.save() c2.subscribe(bucket1) c2.subscribe(bucket2) c2.save() count = 0 seen = set() for r in view_composites_by_subscription(ctx.db, include_docs=True, startkey=bucket1.id, endkey=bucket1.id): comp = Composite.wrap(r.doc) seen.add(comp.id) count += 1 assert count == 2 assert c1.id in seen assert c2.id in seen count = 0 seen = set() for r in view_composites_by_subscription(ctx.db, include_docs=True, startkey=bucket2.id, endkey=bucket2.id): comp = Composite.from_doc(r.doc, ctx) seen.add(comp.id) count += 1 assert count == 1 assert c2.id in seen for r in view_composites_by_subscription(ctx.db, include_docs=True, startkey=bucket3.id, endkey=bucket3.id): assert False, 'unexpected subscription'
def _handle_update_subscription(message_data, message, context): """ main aggregator handler for the 'update_subscription' message """ try: updated_items = message_data.get('updated_items', []) if len(updated_items) == 0: log.debug('Ignoring subscription update with no updated items...') return cid = message_data.get('composite_id', None) if cid is None: log.debug('Ignoring subscription update with no composite id...') return bid = message_data.get('bucket_id', None) if bid is None: log.debug('Ignoring subscription update to %s with no bucket id...' % cid) return composite = Composite.get(cid, context) if composite is None or not 'Composite' in composite.document_types: log.error("Ignoring subscription update for non-existent composite %s" % cid) return # check source if not bid in composite.subscriptions: log.warn('Ignoring subscription update to %s for non-subscribed bucket %s' % (cid, bid)) log.debug("updating %s (from bucket %s)" % (cid, bid)) updated_refs = [] for item in updated_items: ref = dict([(str(k), v) for k, v in item.items()]) updated_refs.append(NewsItemRef.from_doc(ref, context)) count = composite.filtered_update(updated_refs) if count > 0: try: composite.save() except ResourceConflict: # not a big deal in this case. This basically means # our timestamp did not become the latest -- we # have made no alterations other than adding items. # Our additions succeed/fail independently of this as they # are separate documents. pass except: log.error("Error updating composite subscription %s: %s" % (message_data, traceback.format_exc())) raise
def opml(self, id): composite = Composite.get(id, ctx) if composite is None: abort(404) feeds = [] feed_titles = {} for sub_info in composite.subscriptions.itervalues(): feed_url = sub_info.url feeds.append(feed_url) title = sub_info.title if title: feed_titles[feed_url] = title opmlize_response() return dump_opml(feeds, feed_titles=feed_titles)
def test_composite_filtered_update(ctx): from melkman.db.composite import Composite from random import shuffle cc = Composite.create(ctx) # a filter stack that accepts only things with the # word tortoise in the title, or is tagged tortoise cc.filters.append({'op': 'match_title', 'config': {'values': ['tortoise'], 'match_type': 'substring'}, 'action': 'accept'}) cc.filters.append({'op': 'match_tag', 'config': {'values': ['tortoise']}, 'action': 'accept'}) cc.filters.append({'op': 'match_all', 'config': {}, 'action': 'reject'}) ok_items = [dummy_news_item({'title': "The tortoise and the O'Hare"}), dummy_news_item({'details': {'tags': [{'label': 'tortoise'}, {'label': 'shells'}]}})] not_ok_items = [dummy_news_item({'title': 'Jellybirds'}), dummy_news_item({'details': {'tags': [{'label': 'hare'}, {'label': 'shells'}]}})] all_items = [] all_items += ok_items all_items += not_ok_items shuffle(all_items) cc.filtered_update(all_items) for item in ok_items: assert cc.has_news_item(item) for item in not_ok_items: assert not cc.has_news_item(item) cc.save() cc.reload() for item in ok_items: assert cc.has_news_item(item) for item in not_ok_items: assert not cc.has_news_item(item)
def set_opml(self, id): composite = Composite.get(id, ctx) if composite is None: abort(404) opml_data = get_posted_data() try: feeds = set(feeds_in_opml(opml_data)) except: import traceback log.error(traceback.format_exc()) abort(400) result = defaultdict(list) oldfeeds = set(i.url for i in composite.subscriptions.itervalues()) remove = oldfeeds - feeds for url in remove: feed = RemoteFeed.get_by_url(url, ctx) if feed is not None: composite.unsubscribe(feed) result["unsubscribed"].append(url) log.debug('Unsubscribed composite "%s" from %s' % (id, url)) else: result["unsubscribe_failed"].append(url) log.error('Expected composite "%s" to have RemoteFeed for %s' % (id, url)) for url in feeds: if url not in oldfeeds: feed = get_or_immediate_create_by_url(url, ctx) if feed is None: result["subscribe_failed"].append(url) log.warn("Could not get or create feed for %s" % url) continue composite.subscribe(feed) result["subscribed"].append(url) log.debug('Subscribed composite "%s" to %s' % (id, url)) else: result["unchanged"].append(url) composite.save() log.debug('Composite "%s" saved' % id) return json_response(result)
def _handle_new_subscriptions(message_data, message, context): """ helper handler called when new subscriptions are added to a composite. """ try: new_subscriptions = message_data.get('new_subscriptions', []) if len(new_subscriptions) == 0: log.warn("Ignoring init_subscription with no new subscriptions...") return cid = message_data.get('bucket_id', None) if cid is None: log.error("Ignoring init_subscription with no bucket_id: %s" % message_data) return composite = Composite.get(cid, context) if composite is None or not 'Composite' in composite.document_types: log.error("Ignoring subscription update for non-existent composite %s" % cid) return new_feeds = [] updates = 0 for sub in new_subscriptions: if not sub in composite.subscriptions: log.warn("ignoring subscription %s -> %s, not in composite" % (sub, cid)) continue bucket = NewsBucket.get(sub, context) if bucket is None: log.warn("Ignoring init subscription to unknown object (%s)" % composite.subscriptions[sub]) continue # try 'casting' to a RemoteFeed if 'RemoteFeed' in bucket.document_types: rf = RemoteFeed.from_doc(bucket.unwrap(), context) # mark as needing immediate fetch if # there is no history for this feed. if len(rf.update_history) == 0: new_feeds.append(rf.url) continue try: log.debug("init subscription %s -> %s" % (sub, cid)) updates += composite.init_subscription(sub) sleep(0) # yield control except: log.error("Error initializing subscription %s -> %s: %s" % (sub, cid, traceback.format_exc())) if updates > 0: try: composite.save() except ResourceConflict: # not a big deal in this case. This basically means # our timestamp did not become the latest -- we # have made no alterations other than adding items. # Our additions succeed/fail independently of this as they # are separate documents. pass # request that we start indexing anything new... for url in new_feeds: request_feed_index(url, context) except: log.error("Error handling init_subscrition %s: %s" % (message_data, traceback.format_exc())) raise
def test_create_composite(ctx): from melkman.db.composite import Composite cc = Composite.create(ctx) cc.save()
def test_modified_updates_composite(ctx): from eventlet import sleep, spawn from melkman.aggregator.worker import run_aggregator from melkman.db.bucket import NewsBucket from melkman.db.composite import Composite agg = spawn(run_aggregator, ctx) b = [] c = [] # make some buckets and composites. for i in range(3): bucket = NewsBucket.create(ctx) bucket.save() b.append(bucket) comp = Composite.create(ctx) comp.save() c.append(comp) # set up some subscriptions c[0].subscribe(b[0]) c[1].subscribe(b[1]) c[2].subscribe(c[0]) c[2].subscribe(c[1]) for i in range(3): c[i].save() id1 = random_id() b[0].add_news_item(id1) log.debug("updating bucket 0 (%s) with item %s..." % (b[0].id, id1)) b[0].save() sleep(1) # refresh them from the db... for i in range(3): c[i].reload() assert c[0].has_news_item(id1) assert not c[1].has_news_item(id1) assert c[2].has_news_item(id1) id2 = random_id() b[1].add_news_item(id2) log.debug("updating bucket 1 (%s) with item %s..." % (b[1].id, id2)) b[1].save() sleep(1) # refresh them from the db... for i in range(3): c[i].reload() assert not c[0].has_news_item(id2) assert c[1].has_news_item(id2) assert c[2].has_news_item(id2) id3 = random_id() b[2].add_news_item(id3) log.debug("updating bucket 2 (%s) with item %s..." % (b[2].id, id3)) b[2].save() sleep(1) # refresh them from the db... for i in range(3): c[i].reload() assert not c[0].has_news_item(id3) assert not c[1].has_news_item(id3) assert not c[2].has_news_item(id3) agg.kill() agg.wait()