def test_article_metadata_key_lowercase(self): # Keys of metadata should be lowercase. reader = readers.HTMLReader({}) content, metadata = reader.read( _path('article_with_uppercase_metadata.html')) self.assertIn('category', metadata, 'Key should be lowercase.') self.assertEqual('Yeah', metadata.get('category'), 'Value keeps cases.')
def test_article_with_comments(self): reader = readers.HTMLReader({}) content, metadata = reader.read(_path('article_with_comments.html')) self.assertEqual(''' Body content <!-- This comment is included (including extra whitespace) --> ''', content)
def test_article_with_null_attributes(self): reader = readers.HTMLReader({}) content, metadata = reader.read( _path('article_with_null_attributes.html')) self.assertEqual(''' Ensure that empty attributes are copied properly. <input name="test" disabled style="" /> ''', content)
def test_article_with_keywords(self): reader = readers.HTMLReader({}) content, metadata = reader.read(_path('article_with_keywords.html')) expected = { 'tags': ['foo', 'bar', 'foobar'], } for key, value in expected.items(): self.assertEqual(value, metadata[key], key)
def test_article_with_metadata(self): reader = readers.HTMLReader({}) content, metadata = reader.read(_path('article_with_metadata.html')) expected = { 'category': 'yeah', 'author': 'Alexis Métaireau', 'title': 'This is a super article !', 'summary': 'Summary and stuff', 'date': datetime.datetime(2010, 12, 2, 10, 14), 'tags': ['foo', 'bar', 'foobar'], 'custom_field': 'http://notmyidea.org', } for key, value in expected.items(): self.assertEqual(value, metadata[key], key)