def setUp(self): self.parser = SafeHtmlParser()
class SafeHtmlParserTest(unittest.TestCase): def setUp(self): self.parser = SafeHtmlParser() def test_simpleH1H2ShouldBePreserved(self): original = """<h1></h1><h2></h2>""" expected = original self._makeHtmlSafeAndCompare(original, expected) def test_scriptTagShouldBeRemoved(self): original = """<script type='text/javascript'>alert('hacked')</script> <h1></h1><h2></h2> <script src="http://www.hacker.com/xss.js"></script> """ expected = """<h1></h1><h2></h2>""" self._makeHtmlSafeAndCompare(original, expected) def test_htmlPageStructureShouldBeRemoved(self): original = """<!DOCTYPE html> <html lang="en"> <head> <meta charset="utf-8"> <title>page title</title> <link rel="stylesheet" href="/statics/css/general.css" type="text/css" media="screen" /> <link rel="icon" type="image/png" href="/statics/favicon.png" /> </head> <body> <p>This is a simple paragraph</p> </body> </html>""" expected = """<link rel="stylesheet" href="/statics/css/general.css" type="text/css" media="screen" /> <link rel="icon" type="image/png" href="/statics/favicon.png" /> <p>This is a simple paragraph</p>""" self._makeHtmlSafeAndCompare(original, expected) def test_javascriptExecutionThroughCssBackgroundUrlShouldBeRemoved(self): original = """<style type="type/css"> body { background: url("javascript:alert('hacked)"); } .main .sub p { background-image: url(javascript:if(window.hacker==true){hackNowr()}); } </style>""" expected = ( """ <style type="type/css"> body { background: """ + SafeHtmlParser.noJavaScriptInCssWarningIcon + """; } .main .sub p { background-image: """ + SafeHtmlParser.noJavaScriptInCssWarningIcon + """; } </style>""" ) self._makeHtmlSafeAndCompare(original, expected) def test_javascriptExecutionThroughLinkShouldBeRemoved(self): original = """<p> <a href="javascript:hackingNow(2);">The href attribute of this link should be removed</a> </p>""" expected = """<p> <a>The href attribute of this link should be removed</a> </p>""" self._makeHtmlSafeAndCompare(original, expected) def test_textNodesInsideATagShouldBeKept(self): original = """<p> this is some text here that should be kept <a href="javascript:hackingNow(2);">The href attribute of this link should be removed</a> and more text here </p>""" expected = """<p> this is some text here that should be kept <a>The href attribute of this link should be removed</a> and more text here </p>""" self._makeHtmlSafeAndCompare(original, expected) def test_allTextNodesShouldBeKept(self): original = """There can be text node with no HTML tag around them <p>Or with HTML tags around them too <span>Test</span> and some more data here with an <img src="test.jpg" /> and some more data</p> Will this text node be kept?<br /> And this one?""" expected = original self._makeHtmlSafeAndCompare(original, expected) def _makeHtmlSafeAndCompare(self, original, expected): safe = self.parser.getSafeHtml(original) safe = self._cleanStringForEasyComparison(safe) expected = self._cleanStringForEasyComparison(expected) self.assertEqual(safe, expected, "\nEXPECTED:\n" + expected + "\nFOUND:\n" + safe) def _cleanStringForEasyComparison(self, str): lines = str.strip().replace("\t", "").splitlines() return "".join(lines)