def test_rss_parser(): my_opener = urllib2.build_opener(TestHTTPHandler) urllib2.install_opener(my_opener) crawled_feed, data_for_crawl = rss2.parse_rss( rss_xml, 'http://sourcetest.com/rss.xml' ) feed = read(Feed, write(crawled_feed, as_bytes=True)) assert crawled_feed.id == feed.id title = crawled_feed.title assert title.type == feed.title.type assert title.value == feed.title.value links = crawled_feed.links assert links[1].mimetype == feed.links[1].mimetype assert links[1].relation == feed.links[1].relation assert links[1].uri == feed.links[1].uri rights = crawled_feed.rights assert rights.type == feed.rights.type assert rights.value == feed.rights.value contributors = crawled_feed.contributors assert contributors[0].name == feed.contributors[0].name assert contributors[0].email == feed.contributors[0].email assert contributors[1].name == feed.contributors[1].name assert contributors[1].email == feed.contributors[1].email updated_at = crawled_feed.updated_at assert updated_at == feed.updated_at categories = crawled_feed.categories assert categories[0].term == feed.categories[0].term entries = crawled_feed.entries assert entries[0].title.type == feed.entries[0].title.type assert entries[0].title.value == feed.entries[0].title.value assert entries[0].links[0].mimetype == feed.entries[0].links[0].mimetype assert entries[0].links[0].relation == feed.entries[0].links[0].relation assert entries[0].links[0].uri == feed.entries[0].links[0].uri assert entries[0].content.value == feed.entries[0].content.value assert entries[0].authors[0].name == feed.entries[0].authors[0].name assert entries[0].authors[0].email == feed.entries[0].authors[0].email assert entries[0].links[1].mimetype == feed.entries[0].links[1].mimetype assert entries[0].links[1].uri == feed.entries[0].links[1].uri assert entries[0].id == feed.entries[0].id assert (entries[0].published_at == entries[0].updated_at == feed.entries[0].published_at == feed.entries[0].updated_at) assert data_for_crawl == { 'lastBuildDate': datetime.datetime(2002, 9, 7, 0, 0, 1, tzinfo=utc), 'ttl': '10', } source = entries[0].source assert source.title.type == feed.entries[0].source.title.type assert source.title.value == feed.entries[0].source.title.value assert source.links[1].mimetype == feed.entries[0].source.links[1].mimetype assert source.links[1].uri == feed.entries[0].source.links[1].uri assert source.links[1].relation == feed.entries[0].source.links[1].relation assert source.subtitle.type == feed.entries[0].source.subtitle.type assert source.subtitle.value == feed.entries[0].source.subtitle.value assert not source.entries
def test_write_subscription_with_ascii_title(): rss = rss_template_with_title.format('english') feed, _ = parse_rss(rss) feed.id = 'id' sublist = SubscriptionList() sublist.subscribe(feed) g = write(sublist) assert ''.join(g)
def test_rss_parser(): my_opener = urllib2.build_opener(TestHTTPHandler) urllib2.install_opener(my_opener) crawled_feed, data_for_crawl = parse_rss(rss_xml, 'http://sourcetest.com/rss.xml') feed = read(Feed, write(crawled_feed, as_bytes=True)) assert crawled_feed.id == feed.id title = crawled_feed.title assert title.type == feed.title.type assert title.value == feed.title.value links = crawled_feed.links assert links[1].mimetype == feed.links[1].mimetype assert links[1].relation == feed.links[1].relation assert links[1].uri == feed.links[1].uri rights = crawled_feed.rights assert rights.type == feed.rights.type assert rights.value == feed.rights.value contributors = crawled_feed.contributors assert contributors[0].name == feed.contributors[0].name assert contributors[0].email == feed.contributors[0].email assert contributors[1].name == feed.contributors[1].name assert contributors[1].email == feed.contributors[1].email updated_at = crawled_feed.updated_at assert updated_at == feed.updated_at categories = crawled_feed.categories assert categories[0].term == feed.categories[0].term entries = crawled_feed.entries assert entries[0].title.type == feed.entries[0].title.type assert entries[0].title.value == feed.entries[0].title.value assert entries[0].links[0].mimetype == feed.entries[0].links[0].mimetype assert entries[0].links[0].relation == feed.entries[0].links[0].relation assert entries[0].links[0].uri == feed.entries[0].links[0].uri assert entries[0].content.value == feed.entries[0].content.value assert entries[0].authors[0].name == feed.entries[0].authors[0].name assert entries[0].authors[0].email == feed.entries[0].authors[0].email assert entries[0].links[1].mimetype == feed.entries[0].links[1].mimetype assert entries[0].links[1].uri == feed.entries[0].links[1].uri assert entries[0].id == feed.entries[0].id assert (entries[0].published_at == entries[0].updated_at == feed.entries[0].published_at == feed.entries[0].updated_at) assert data_for_crawl == { 'lastBuildDate': datetime.datetime(2002, 9, 7, 0, 0, 1, tzinfo=utc), 'ttl': '10', } source = entries[0].source assert source.title.type == feed.entries[0].source.title.type assert source.title.value == feed.entries[0].source.title.value assert source.links[1].mimetype == feed.entries[0].source.links[1].mimetype assert source.links[1].uri == feed.entries[0].source.links[1].uri assert source.links[1].relation == feed.entries[0].source.links[1].relation assert source.subtitle.type == feed.entries[0].source.subtitle.type assert source.subtitle.value == feed.entries[0].source.subtitle.value assert not source.entries
def test_log_warnings_during_rss_parsing(): my_opener = urllib2.build_opener(TestHTTPHandler) urllib2.install_opener(my_opener) with mock.patch('logging.getLogger') as mock_func: crawled_feed, data_for_crawl = parse_rss( rss_xml, 'http://sourcetest.com/rss.xml') mock_func.assert_any_call('libearth.parser.rss2.rss_get_channel_data') mock_func.assert_any_call('libearth.parser.rss2.rss_get_item_data') mock_logger = mock_func.return_value for call in mock_logger.method_calls: name, args, _ = call assert name == 'warn' assert args[0] == 'Unknown tag: %s'
def test_write_subscription_with_nonascii_title(): '''SubscriptionList convert the feed title to :class:`str`, and :class:`write` try to encode the title in utf8. When non-ascii characters are in the title, UnicodeDecodeError is raised. ''' rss = rss_template_with_title.format('한글') feed, _ = parse_rss(rss) feed.id = 'id' sublist = SubscriptionList() sublist.subscribe(feed) g = write(sublist) assert ''.join(g)
def test_log_warnings_during_rss_parsing(): my_opener = urllib2.build_opener(TestHTTPHandler) urllib2.install_opener(my_opener) with mock.patch('logging.getLogger') as mock_func: crawled_feed, data_for_crawl = rss2.parse_rss( rss_xml, 'http://sourcetest.com/rss.xml' ) mock_func.assert_any_call('libearth.parser.rss2.rss_get_channel_data') mock_func.assert_any_call('libearth.parser.rss2.rss_get_item_data') mock_logger = mock_func.return_value for call in mock_logger.method_calls: name, args, _ = call assert name == 'warn' assert args[0] == 'Unknown tag: %s'
def test_rss_with_no_pubDate(): feed_data, crawler_hints = parse_rss(rss_with_no_pubDate) assert feed_data.updated_at assert feed_data.entries[0].updated_at
def test_rss_with_empty_title(): """Empty title should be empty string, not :const:`None`.""" feed, crawler_hints = parse_rss(rss_with_empty_title) assert feed.title.value == ''
def test_rss_item_guid(): feed_data, crawler_hints = rss2.parse_rss(rss_with_guid, None) assert feed_data.entries[0].id == \ 'urn:uuid:3F2504E0-4F89-11D3-9A0C-0305E82C3301' assert feed_data.entries[1].id == 'http://guidtest.com/1' assert feed_data.entries[2].id == ''
def test_rss_with_empty_title(): """Empty title should be empty string, not :const:`None`.""" feed, crawler_hints = rss2.parse_rss(rss_with_empty_title) assert feed.title.value == ''
def test_rss_with_no_pubDate(): feed_data, crawler_hints = rss2.parse_rss(rss_with_no_pubDate) assert feed_data.updated_at
def test_rss_without_title(): feed, _ = parse_rss(rss_without_title, None) assert not feed.entries assert (text_type(feed.title) == text_type(feed.subtitle) == 'only description')
def test_rss_item_guid(): feed_data, crawler_hints = parse_rss(rss_with_guid, None) assert feed_data.entries[0].id == \ 'urn:uuid:3F2504E0-4F89-11D3-9A0C-0305E82C3301' assert feed_data.entries[1].id == 'http://guidtest.com/1' assert feed_data.entries[2].id == ''