示例#1
0
def test_rss_parser():
    my_opener = urllib2.build_opener(TestHTTPHandler)
    urllib2.install_opener(my_opener)
    crawled_feed, data_for_crawl = rss2.parse_rss(
        rss_xml,
        'http://sourcetest.com/rss.xml'
    )
    feed = read(Feed, write(crawled_feed, as_bytes=True))
    assert crawled_feed.id == feed.id
    title = crawled_feed.title
    assert title.type == feed.title.type
    assert title.value == feed.title.value
    links = crawled_feed.links
    assert links[1].mimetype == feed.links[1].mimetype
    assert links[1].relation == feed.links[1].relation
    assert links[1].uri == feed.links[1].uri
    rights = crawled_feed.rights
    assert rights.type == feed.rights.type
    assert rights.value == feed.rights.value
    contributors = crawled_feed.contributors
    assert contributors[0].name == feed.contributors[0].name
    assert contributors[0].email == feed.contributors[0].email
    assert contributors[1].name == feed.contributors[1].name
    assert contributors[1].email == feed.contributors[1].email
    updated_at = crawled_feed.updated_at
    assert updated_at == feed.updated_at
    categories = crawled_feed.categories
    assert categories[0].term == feed.categories[0].term
    entries = crawled_feed.entries
    assert entries[0].title.type == feed.entries[0].title.type
    assert entries[0].title.value == feed.entries[0].title.value
    assert entries[0].links[0].mimetype == feed.entries[0].links[0].mimetype
    assert entries[0].links[0].relation == feed.entries[0].links[0].relation
    assert entries[0].links[0].uri == feed.entries[0].links[0].uri
    assert entries[0].content.value == feed.entries[0].content.value
    assert entries[0].authors[0].name == feed.entries[0].authors[0].name
    assert entries[0].authors[0].email == feed.entries[0].authors[0].email
    assert entries[0].links[1].mimetype == feed.entries[0].links[1].mimetype
    assert entries[0].links[1].uri == feed.entries[0].links[1].uri
    assert entries[0].id == feed.entries[0].id
    assert (entries[0].published_at ==
            entries[0].updated_at ==
            feed.entries[0].published_at ==
            feed.entries[0].updated_at)
    assert data_for_crawl == {
        'lastBuildDate': datetime.datetime(2002, 9, 7, 0, 0, 1, tzinfo=utc),
        'ttl': '10',
    }
    source = entries[0].source
    assert source.title.type == feed.entries[0].source.title.type
    assert source.title.value == feed.entries[0].source.title.value
    assert source.links[1].mimetype == feed.entries[0].source.links[1].mimetype
    assert source.links[1].uri == feed.entries[0].source.links[1].uri
    assert source.links[1].relation == feed.entries[0].source.links[1].relation
    assert source.subtitle.type == feed.entries[0].source.subtitle.type
    assert source.subtitle.value == feed.entries[0].source.subtitle.value
    assert not source.entries
示例#2
0
def test_write_subscription_with_ascii_title():
    rss = rss_template_with_title.format('english')
    feed, _ = parse_rss(rss)
    feed.id = 'id'

    sublist = SubscriptionList()
    sublist.subscribe(feed)

    g = write(sublist)
    assert ''.join(g)
示例#3
0
def test_write_subscription_with_ascii_title():
    rss = rss_template_with_title.format('english')
    feed, _ = parse_rss(rss)
    feed.id = 'id'

    sublist = SubscriptionList()
    sublist.subscribe(feed)

    g = write(sublist)
    assert ''.join(g)
示例#4
0
def test_rss_parser():
    my_opener = urllib2.build_opener(TestHTTPHandler)
    urllib2.install_opener(my_opener)
    crawled_feed, data_for_crawl = parse_rss(rss_xml,
                                             'http://sourcetest.com/rss.xml')
    feed = read(Feed, write(crawled_feed, as_bytes=True))
    assert crawled_feed.id == feed.id
    title = crawled_feed.title
    assert title.type == feed.title.type
    assert title.value == feed.title.value
    links = crawled_feed.links
    assert links[1].mimetype == feed.links[1].mimetype
    assert links[1].relation == feed.links[1].relation
    assert links[1].uri == feed.links[1].uri
    rights = crawled_feed.rights
    assert rights.type == feed.rights.type
    assert rights.value == feed.rights.value
    contributors = crawled_feed.contributors
    assert contributors[0].name == feed.contributors[0].name
    assert contributors[0].email == feed.contributors[0].email
    assert contributors[1].name == feed.contributors[1].name
    assert contributors[1].email == feed.contributors[1].email
    updated_at = crawled_feed.updated_at
    assert updated_at == feed.updated_at
    categories = crawled_feed.categories
    assert categories[0].term == feed.categories[0].term
    entries = crawled_feed.entries
    assert entries[0].title.type == feed.entries[0].title.type
    assert entries[0].title.value == feed.entries[0].title.value
    assert entries[0].links[0].mimetype == feed.entries[0].links[0].mimetype
    assert entries[0].links[0].relation == feed.entries[0].links[0].relation
    assert entries[0].links[0].uri == feed.entries[0].links[0].uri
    assert entries[0].content.value == feed.entries[0].content.value
    assert entries[0].authors[0].name == feed.entries[0].authors[0].name
    assert entries[0].authors[0].email == feed.entries[0].authors[0].email
    assert entries[0].links[1].mimetype == feed.entries[0].links[1].mimetype
    assert entries[0].links[1].uri == feed.entries[0].links[1].uri
    assert entries[0].id == feed.entries[0].id
    assert (entries[0].published_at == entries[0].updated_at ==
            feed.entries[0].published_at == feed.entries[0].updated_at)
    assert data_for_crawl == {
        'lastBuildDate': datetime.datetime(2002, 9, 7, 0, 0, 1, tzinfo=utc),
        'ttl': '10',
    }
    source = entries[0].source
    assert source.title.type == feed.entries[0].source.title.type
    assert source.title.value == feed.entries[0].source.title.value
    assert source.links[1].mimetype == feed.entries[0].source.links[1].mimetype
    assert source.links[1].uri == feed.entries[0].source.links[1].uri
    assert source.links[1].relation == feed.entries[0].source.links[1].relation
    assert source.subtitle.type == feed.entries[0].source.subtitle.type
    assert source.subtitle.value == feed.entries[0].source.subtitle.value
    assert not source.entries
示例#5
0
def test_log_warnings_during_rss_parsing():
    my_opener = urllib2.build_opener(TestHTTPHandler)
    urllib2.install_opener(my_opener)
    with mock.patch('logging.getLogger') as mock_func:
        crawled_feed, data_for_crawl = parse_rss(
            rss_xml, 'http://sourcetest.com/rss.xml')
    mock_func.assert_any_call('libearth.parser.rss2.rss_get_channel_data')
    mock_func.assert_any_call('libearth.parser.rss2.rss_get_item_data')
    mock_logger = mock_func.return_value
    for call in mock_logger.method_calls:
        name, args, _ = call
        assert name == 'warn'
        assert args[0] == 'Unknown tag: %s'
示例#6
0
def test_write_subscription_with_nonascii_title():
    '''SubscriptionList convert the feed title to :class:`str`, and
    :class:`write` try to encode the title in utf8.
    When non-ascii characters are in the title, UnicodeDecodeError is raised.
    '''
    rss = rss_template_with_title.format('한글')
    feed, _ = parse_rss(rss)
    feed.id = 'id'

    sublist = SubscriptionList()
    sublist.subscribe(feed)

    g = write(sublist)
    assert ''.join(g)
示例#7
0
def test_write_subscription_with_nonascii_title():
    '''SubscriptionList convert the feed title to :class:`str`, and
    :class:`write` try to encode the title in utf8.
    When non-ascii characters are in the title, UnicodeDecodeError is raised.
    '''
    rss = rss_template_with_title.format('한글')
    feed, _ = parse_rss(rss)
    feed.id = 'id'

    sublist = SubscriptionList()
    sublist.subscribe(feed)

    g = write(sublist)
    assert ''.join(g)
示例#8
0
def test_log_warnings_during_rss_parsing():
    my_opener = urllib2.build_opener(TestHTTPHandler)
    urllib2.install_opener(my_opener)
    with mock.patch('logging.getLogger') as mock_func:
        crawled_feed, data_for_crawl = rss2.parse_rss(
            rss_xml,
            'http://sourcetest.com/rss.xml'
        )
    mock_func.assert_any_call('libearth.parser.rss2.rss_get_channel_data')
    mock_func.assert_any_call('libearth.parser.rss2.rss_get_item_data')
    mock_logger = mock_func.return_value
    for call in mock_logger.method_calls:
        name, args, _ = call
        assert name == 'warn'
        assert args[0] == 'Unknown tag: %s'
示例#9
0
def test_rss_with_no_pubDate():
    feed_data, crawler_hints = parse_rss(rss_with_no_pubDate)
    assert feed_data.updated_at
    assert feed_data.entries[0].updated_at
示例#10
0
def test_rss_with_empty_title():
    """Empty title should be empty string, not :const:`None`."""
    feed, crawler_hints = parse_rss(rss_with_empty_title)
    assert feed.title.value == ''
示例#11
0
def test_rss_item_guid():
    feed_data, crawler_hints = rss2.parse_rss(rss_with_guid, None)
    assert feed_data.entries[0].id == \
        'urn:uuid:3F2504E0-4F89-11D3-9A0C-0305E82C3301'
    assert feed_data.entries[1].id == 'http://guidtest.com/1'
    assert feed_data.entries[2].id == ''
示例#12
0
def test_rss_with_empty_title():
    """Empty title should be empty string, not :const:`None`."""
    feed, crawler_hints = rss2.parse_rss(rss_with_empty_title)
    assert feed.title.value == ''
示例#13
0
def test_rss_with_no_pubDate():
    feed_data, crawler_hints = rss2.parse_rss(rss_with_no_pubDate)
    assert feed_data.updated_at
示例#14
0
def test_rss_without_title():
    feed, _ = parse_rss(rss_without_title, None)
    assert not feed.entries
    assert (text_type(feed.title) == text_type(feed.subtitle) ==
            'only description')
示例#15
0
def test_rss_with_no_pubDate():
    feed_data, crawler_hints = parse_rss(rss_with_no_pubDate)
    assert feed_data.updated_at
    assert feed_data.entries[0].updated_at
示例#16
0
def test_rss_without_title():
    feed, _ = parse_rss(rss_without_title, None)
    assert not feed.entries
    assert (text_type(feed.title) == text_type(feed.subtitle) ==
            'only description')
示例#17
0
def test_rss_item_guid():
    feed_data, crawler_hints = parse_rss(rss_with_guid, None)
    assert feed_data.entries[0].id == \
        'urn:uuid:3F2504E0-4F89-11D3-9A0C-0305E82C3301'
    assert feed_data.entries[1].id == 'http://guidtest.com/1'
    assert feed_data.entries[2].id == ''