Пример #1
0
def test_filter_chain(ctx):
    from melk.util.dibject import Dibject, dibjectify
    from melkman.filters import NewsItemFilterFactory, ACCEPT_ITEM, REJECT_ITEM

    filter_factory = NewsItemFilterFactory(ctx.component_manager)
    
    chain = [
        {'op': 'match_author',
         'config': {'values': ['fred']},
         'action': 'reject'},
        {'op': 'and',
        'config': {
            'filters': [
                    {'op': 'match_author',
                    'config': {'values': ['barney']}},
                    {'op': 'match_field',
                     'config': {'field': 'foo',
                                'values': ['bar']}}]},
        'action': 'accept'},
        {'op': 'match_field',
         'config': {'field': 'foo',
                    'values': ['bar']},
         'action': 'reject'},
        {'op': 'match_all',
         'config': {},
         'action': 'accept'}
    ]
    chain = [dibjectify(x) for x in chain]
    chain = filter_factory.create_chain(chain)
    
    assert chain(dummy_news_item({'author': 'fred'})) == REJECT_ITEM
    assert chain(dummy_news_item({})) == ACCEPT_ITEM
    assert chain(dummy_news_item({'details': {'foo': 'bar'}})) == REJECT_ITEM
    assert chain(dummy_news_item({'author': 'barney', 'details': {'foo': 'bar'}})) == ACCEPT_ITEM
Пример #2
0
def dummy_news_item(d):
    di = DummyItem(dibjectify(d))
    di.setdefault('author', 'Whoever T. Merriweather')
    di.setdefault('item_id', random_id())
    di.setdefault('timestamp', datetime.utcnow())
    di.setdefault('title', 'The News Title')
    di.setdefault('link', 'http://example.org/blagosphere?id=12')
    di.setdefault('source_title', 'The Blags')
    di.setdefault('source_url', 'http://example.org/blagosphere')
    di.setdefault('summary', 'abaraljsrs sjrkja rsj klrjewori ew rwa riojweroiwer iowr wre')
    di.setdefault('details', Dibject())

    return di
Пример #3
0
 def __init__(self, config):
     self.config = dibjectify(config)
     self._local = green_local()
     find_plugins_by_entry_point(MELKMAN_PLUGIN_ENTRY_POINT)
     self._broker = None
Пример #4
0
def parse_feed(content, feed_url):
    fake_headers = {
        'content-location': feed_url,
        'content-type': 'text/xml; charset=utf-8',
    }
    ff = feedparser.parse(content, header_defaults=fake_headers)

    # make a clean copy composed of built-in types
    ff = dibjectify(ff)

    if ff is None or not 'feed' in ff:
        raise InvalidFeedError()

    #
    # perform some cleanup...
    #
    source_url = canonical_url(feed_url)

    # make sure the feed has an id...
    if not 'id' in ff.feed:
        ff.feed['id'] = source_url.lower()
    
    # make sure the feed has a self referential link
    has_self_ref = False
    ff.feed.setdefault('links', [])
    for link in ff.feed.links:
        if link.rel == 'self':
            has_self_ref = True
            break
    if not has_self_ref:
        ff.feed.links.append(Dibject(rel='self', href=source_url, title=''))

    # create a structure holding the appropriate source information 
    # from the feed.  This will be copied into each entry.
    source_info = Dibject()
    for k in ['id', 'title', 'title_detail', 'link', 'links', 'icon']:
        try:
            source_info[k] = deepcopy(ff.feed[k])
        except KeyError:
            pass

    out_entries = []
    for e in ff.get('entries', []):
        # make sure it has an id
        eid = e.get('id', None)
        if eid is None:
            eid = find_best_entry_id(e)
            if eid is None:
                # throw this entry out, it has no 
                # id, title, summary or content
                # that is recognizable...
                continue
            e['id'] = eid

        # assign a guid based on the id given and the source url
        e['melk_id'] = melk_id(eid, source_url.lower())

        # build a 'source' entry for each entry which points
        # back to this feed. if there is already a source
        # specified in the entry, we move it aside to 
        # original_source.
        if 'source' in e:
            e['original_source'] = e.source
        
        e.source = deepcopy(source_info)
        out_entries.append(e)

    ff['entries'] = out_entries

    return ff