def test_missing_title_tag(): hfr = HtmlFileReader('no_filename') html_frag = """ <head> <h1>Gandalf</h1> <body>hi</body> """ parts = hfr.parse_html(html_frag) assert parts['title'] == '[[NO TITLE FOUND]]'
def test_content_without_body(): """ If you don't have any content in your page at all; you'll get this message.""" hfr = HtmlFileReader('no_filename') html_frag = """ <head> <title>Gandalf</title> </head> """ parts = hfr.parse_html(html_frag) assert parts['content'] == '[[YOUR CONTENT SHOULD GO HERE]]'
def test_read_category_metadata(): # Category is a special case; it gets converted to "categories" hfr = HtmlFileReader('no_filename') html_frag = """ <head> <title>Gandalf</title> <meta name="category" content="hobbit"/> <body>Hi</body> """ parts = hfr.parse_html(html_frag) assert parts['meta']['categories'] == 'hobbit'
def test_read_title_tag(): hfr = HtmlFileReader('no_filename') html_frag = """ <head> <title>Gandalf</title> <body>hi</body> """ parts = hfr.parse_html(html_frag) # Ensure both ways of reading titles are available assert parts['title'] == 'Gandalf' assert parts['meta']['title'] == 'Gandalf'
def test_bad_meta_tag(): hfr = HtmlFileReader('no_filename') # This meta tag doesn't have a "content" attribute. html_frag = """ <head> <title>Gandalf</title> <meta name="tags" value="hobbit"/> <body>Hi</body> """ parts = hfr.parse_html(html_frag) # No category stuff, and we should NOT throw an exception assert 'tags' not in parts['meta']
def test_custom_theme(): hfr = HtmlFileReader('no_filename') # This meta tag doesn't have a "content" attribute. html_frag = """ <head> <title>Gandalf</title> <meta name="theme" content="lotr"/> <body>Hi</body> """ parts = hfr.parse_html(html_frag) # No category stuff, and we should NOT throw an exception assert 'theme' in parts['meta'] assert parts['meta']['theme'] == 'lotr'
def test_read_content(): hfr = HtmlFileReader('no_filename') html_frag = """ <head> <title>Gandalf</title> <body> <p>Bilbo Baggins</p> <p>Bard the Bowman</p> <p>Thorin Oakenshield</p> </body> """ parts = hfr.parse_html(html_frag) expected = ''' <p>Bilbo Baggins</p> <p>Bard the Bowman</p> <p>Thorin Oakenshield</p> ''' assert parts['content'] == expected
def test_content_no_explicit_body_tag(): """ if a <body> tag is not specified, anything that is not a <head> is treated as the body.""" hfr = HtmlFileReader('no_filename') html_frag = """ <head> <title>Gandalf</title> </head> <div> <p>Bilbo Baggins</p> <p>Bard the Bowman</p> <p>Thorin Oakenshield</p> </div> """ expected = '''<div> <p>Bilbo Baggins</p> <p>Bard the Bowman</p> <p>Thorin Oakenshield</p> </div> ''' parts = hfr.parse_html(html_frag) assert parts['content'] == expected