def test_charset_in_header(self) -> None: html = """<html> <head> <meta property="og:title" content="中文" /> </head> </html>""".encode("big5") parser = OpenGraphParser(html, "text/html; charset=Big5") result = parser.extract_data() self.assertEqual(result["title"], "中文")
def test_page_with_og(self) -> None: html = b"""<html> <head> <meta property="og:title" content="The Rock" /> <meta property="og:type" content="video.movie" /> <meta property="og:url" content="http://www.imdb.com/title/tt0117500/" /> <meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" /> <meta property="og:description" content="The Rock film" /> </head> </html>""" parser = OpenGraphParser(html, "text/html; charset=UTF-8") result = parser.extract_data() self.assertEqual(result.title, "The Rock") self.assertEqual(result.description, "The Rock film")
def test_page_with_og(self) -> None: html = """<html> <head> <meta property="og:title" content="The Rock" /> <meta property="og:type" content="video.movie" /> <meta property="og:url" content="http://www.imdb.com/title/tt0117500/" /> <meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" /> <meta property="og:description" content="The Rock film" /> </head> </html>""" parser = OpenGraphParser(html) result = parser.extract_data() self.assertIn('title', result) self.assertEqual(result['title'], 'The Rock') self.assertEqual(result.get('description'), 'The Rock film')
def test_page_with_og(self) -> None: html = """<html> <head> <meta property="og:title" content="The Rock" /> <meta property="og:type" content="video.movie" /> <meta property="og:url" content="http://www.imdb.com/title/tt0117500/" /> <meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" /> <meta property="og:description" content="The Rock film" /> </head> </html>""" parser = OpenGraphParser(html) result = parser.extract_data() self.assertIn('title', result) self.assertEqual(result['title'], 'The Rock') self.assertEqual(result.get('description'), 'The Rock film')
def test_page_with_evil_og_tags(self) -> None: html = b"""<html> <head> <meta property="og:title" content="The Rock" /> <meta property="og:type" content="video.movie" /> <meta property="og:url" content="http://www.imdb.com/title/tt0117500/" /> <meta property="og:image" content="http://ia.media-imdb.com/images/rock.jpg" /> <meta property="og:description" content="The Rock film" /> <meta property="og:html" content="<script>alert(window.location)</script>" /> <meta property="og:oembed" content="True" /> </head> </html>""" parser = OpenGraphParser(html, "text/html; charset=UTF-8") result = parser.extract_data() self.assertIn("title", result) self.assertEqual(result["title"], "The Rock") self.assertEqual(result.get("description"), "The Rock film") self.assertEqual(result.get("oembed"), None) self.assertEqual(result.get("html"), None)