def format_html_title(title, url=None): doc = HTMLDocument(""" <html><head><title>%s</title></head><body>Hello</body></html> """ % title, url=url) doc.parse() return format_title(doc, {})
def test_format_title(): def format_html_title(title, url=None): doc = HTMLDocument(""" <html><head><meta charset="UTF-8"><title>%s</title></head><body>Hello</body></html> """ % title, url=url) doc.parse() return format_title(doc, {}) assert format_html_title("A Title!") == "A Title!" assert format_html_title(" A \n Title\t \t! ") == "A Title !" assert format_html_title("a" * 100) == ("a" * 70) + "..." # # Test that emoji chararacters and symbols are removed from titles emoji_title = u"😋 Super Emoji-Land.com " "" emoji_title = emoji_title.encode('utf8') emoji_title = format_html_title(emoji_title) assert emoji_title == "Super Emoji-Land.com" assert format_html_title(("a" * 60) + " 2345678 1234567") == ("a" * 60) + " 2345678..." assert format_html_title(("a" * 60) + " 234567890 1234567") == ("a" * 60) + " 234567890..." assert format_html_title(("a" * 60) + " 2345678901 1234567") == ("a" * 60) + "..." # Test domain fallback assert format_html_title( " ", url="http://www.example.com/hello.html") == "Example" # Test blacklist assert format_html_title( " home ", url="http://www.example.com/hello.html") == "Example" # Test OGP html = """<html> <head><meta property="og:title" content="Open graph title " /></head> <body>This is <body> text</body> </html>""" page = HTMLDocument(html).parse() assert format_title(page, {}) == "Open graph title"
def test_format_title(): def format_html_title(title, url=None): doc = HTMLDocument(""" <html><head><meta charset="UTF-8"><title>%s</title></head><body>Hello</body></html> """ % title, url=url) doc.parse() return format_title(doc, {}) assert format_html_title("A Title!") == "A Title!" assert format_html_title(" A \n Title\t \t! ") == "A Title !" assert format_html_title("a" * 100) == ("a" * 70) + "..." # # Test that emoji chararacters and symbols are removed from titles emoji_title = u"😋 Super Emoji-Land.com """ emoji_title = emoji_title.encode('utf8') emoji_title = format_html_title(emoji_title) assert emoji_title == "Super Emoji-Land.com" assert format_html_title(("a" * 60) + " 2345678 1234567") == ("a" * 60) + " 2345678..." assert format_html_title(("a" * 60) + " 234567890 1234567") == ("a" * 60) + " 234567890..." assert format_html_title(("a" * 60) + " 2345678901 1234567") == ("a" * 60) + "..." # Test domain fallback assert format_html_title(" ", url="http://www.example.com/hello.html") == "Example" # Test blacklist assert format_html_title(" home ", url="http://www.example.com/hello.html") == "Example" # Test OGP html = """<html> <head><meta property="og:title" content="Open graph title " /></head> <body>This is <body> text</body> </html>""" page = HTMLDocument(html).parse() assert format_title(page, {}) == "Open graph title"
def test_format_title(): def format_html_title(title, url=None): doc = HTMLDocument(""" <html><head><title>%s</title></head><body>Hello</body></html> """ % title, url=url) doc.parse() return format_title(doc, {}) assert format_html_title("A Title!") == "A Title!" assert format_html_title(" A \n Title\t \t! ") == "A Title !" assert format_html_title("a" * 100) == ("a" * 70) + "..." assert format_html_title(("a" * 60) + " 2345678 1234567") == ("a" * 60) + " 2345678..." assert format_html_title(("a" * 60) + " 234567890 1234567") == ("a" * 60) + " 234567890..." assert format_html_title(("a" * 60) + " 2345678901 1234567") == ("a" * 60) + "..." # Test domain fallback assert format_html_title( " ", url="http://www.example.com/hello.html") == "Example" # Test blacklist assert format_html_title( " home ", url="http://www.example.com/hello.html") == "Example" # Test OGP html = """<html> <head><meta property="og:title" content="Open graph title " /></head> <body>This is <body> text</body> </html>""" page = HTMLDocument(html).parse() assert format_title(page, {}) == "Open graph title"
def format_html_title(title, url=None): doc = HTMLDocument(""" <html><head><meta charset="UTF-8"><title>%s</title></head><body>Hello</body></html> """ % title, url=url) doc.parse() return format_title(doc, {})