def test_filter_chain(ctx): from melk.util.dibject import Dibject, dibjectify from melkman.filters import NewsItemFilterFactory, ACCEPT_ITEM, REJECT_ITEM filter_factory = NewsItemFilterFactory(ctx.component_manager) chain = [ {'op': 'match_author', 'config': {'values': ['fred']}, 'action': 'reject'}, {'op': 'and', 'config': { 'filters': [ {'op': 'match_author', 'config': {'values': ['barney']}}, {'op': 'match_field', 'config': {'field': 'foo', 'values': ['bar']}}]}, 'action': 'accept'}, {'op': 'match_field', 'config': {'field': 'foo', 'values': ['bar']}, 'action': 'reject'}, {'op': 'match_all', 'config': {}, 'action': 'accept'} ] chain = [dibjectify(x) for x in chain] chain = filter_factory.create_chain(chain) assert chain(dummy_news_item({'author': 'fred'})) == REJECT_ITEM assert chain(dummy_news_item({})) == ACCEPT_ITEM assert chain(dummy_news_item({'details': {'foo': 'bar'}})) == REJECT_ITEM assert chain(dummy_news_item({'author': 'barney', 'details': {'foo': 'bar'}})) == ACCEPT_ITEM
def dummy_news_item(d): di = DummyItem(dibjectify(d)) di.setdefault('author', 'Whoever T. Merriweather') di.setdefault('item_id', random_id()) di.setdefault('timestamp', datetime.utcnow()) di.setdefault('title', 'The News Title') di.setdefault('link', 'http://example.org/blagosphere?id=12') di.setdefault('source_title', 'The Blags') di.setdefault('source_url', 'http://example.org/blagosphere') di.setdefault('summary', 'abaraljsrs sjrkja rsj klrjewori ew rwa riojweroiwer iowr wre') di.setdefault('details', Dibject()) return di
def __init__(self, config): self.config = dibjectify(config) self._local = green_local() find_plugins_by_entry_point(MELKMAN_PLUGIN_ENTRY_POINT) self._broker = None
def parse_feed(content, feed_url): fake_headers = { 'content-location': feed_url, 'content-type': 'text/xml; charset=utf-8', } ff = feedparser.parse(content, header_defaults=fake_headers) # make a clean copy composed of built-in types ff = dibjectify(ff) if ff is None or not 'feed' in ff: raise InvalidFeedError() # # perform some cleanup... # source_url = canonical_url(feed_url) # make sure the feed has an id... if not 'id' in ff.feed: ff.feed['id'] = source_url.lower() # make sure the feed has a self referential link has_self_ref = False ff.feed.setdefault('links', []) for link in ff.feed.links: if link.rel == 'self': has_self_ref = True break if not has_self_ref: ff.feed.links.append(Dibject(rel='self', href=source_url, title='')) # create a structure holding the appropriate source information # from the feed. This will be copied into each entry. source_info = Dibject() for k in ['id', 'title', 'title_detail', 'link', 'links', 'icon']: try: source_info[k] = deepcopy(ff.feed[k]) except KeyError: pass out_entries = [] for e in ff.get('entries', []): # make sure it has an id eid = e.get('id', None) if eid is None: eid = find_best_entry_id(e) if eid is None: # throw this entry out, it has no # id, title, summary or content # that is recognizable... continue e['id'] = eid # assign a guid based on the id given and the source url e['melk_id'] = melk_id(eid, source_url.lower()) # build a 'source' entry for each entry which points # back to this feed. if there is already a source # specified in the entry, we move it aside to # original_source. if 'source' in e: e['original_source'] = e.source e.source = deepcopy(source_info) out_entries.append(e) ff['entries'] = out_entries return ff