def runTreewalkerTest(innerHTML, input, expected, errors, treeClass): warnings.resetwarnings() warnings.simplefilter("error") try: p = html5parser.HTMLParser(tree=treeClass["builder"]) if innerHTML: document = p.parseFragment(input, innerHTML) else: document = p.parse(input) except constants.DataLossWarning: # Ignore testcases we know we don't pass return document = treeClass.get("adapter", lambda x: x)(document) try: output = treewalkers.pprint(Lint(treeClass["walker"](document))) output = attrlist.sub(sortattrs, output) expected = attrlist.sub(sortattrs, convertExpected(expected)) diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()], [line + "\n" for line in output.splitlines()], "Expected", "Received")) assert expected == output, "\n".join([ "", "Input:", input, "", "Expected:", expected, "", "Received:", output, "", "Diff:", diff, ]) except NotImplementedError: pass # Amnesty for those that confess...
def test_maintain_attribute_order_adjusted(treeName): treeAPIs = treeTypes[treeName] if treeAPIs is None: pytest.skip("Treebuilder not loaded") # generate loads to maximize the chance a hash-based mutation will occur data = "<svg a=1 refx=2 b=3 xml:lang=4 c=5>" parser = html5parser.HTMLParser(tree=treeAPIs["builder"]) document = parser.parseFragment(data) document = treeAPIs.get("adapter", lambda x: x)(document) output = list(Lint(treeAPIs["walker"](document))) assert len(output) == 2 assert output[0]['type'] == 'StartTag' assert output[1]['type'] == "EndTag" attrs_out = output[0]['data'] assert list(attrs_out.items()) == [ ((None, 'a'), '1'), ((None, 'refX'), '2'), ((None, 'b'), '3'), (('http://www.w3.org/XML/1998/namespace', 'lang'), '4'), ((None, 'c'), '5') ]
def test_maintain_attribute_order(treeName): treeAPIs = treeTypes[treeName] if treeAPIs is None: pytest.skip("Treebuilder not loaded") # generate loads to maximize the chance a hash-based mutation will occur attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">" parser = html5parser.HTMLParser(tree=treeAPIs["builder"]) document = parser.parseFragment(data) document = treeAPIs.get("adapter", lambda x: x)(document) output = list(Lint(treeAPIs["walker"](document))) assert len(output) == 2 assert output[0]['type'] == 'StartTag' assert output[1]['type'] == "EndTag" attrs_out = output[0]['data'] assert len(attrs) == len(attrs_out) for (in_name, in_value), (out_name, out_value) in zip(attrs, attrs_out.items()): assert (None, in_name) == out_name assert in_value == out_value
def test_lxml_xml(): expected = [{ 'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag' }, { 'data': {}, 'name': 'div', 'namespace': None, 'type': 'StartTag' }, { 'name': 'div', 'namespace': None, 'type': 'EndTag' }, { 'name': 'div', 'namespace': None, 'type': 'EndTag' }] lxmltree = lxml.etree.fromstring('<div><div></div></div>') walker = treewalkers.getTreeWalker('lxml') output = Lint(walker(lxmltree)) assert list(output) == expected
def serialize_html(input, options): options = dict([(str(k), v) for k, v in options.items()]) encoding = options.get("encoding", None) if "encoding" in options: del options["encoding"] stream = Lint(JsonWalker(input), False) serializer = HTMLSerializer(alphabetical_attributes=True, **options) return serializer.render(stream, encoding)
def runtest(self): if self.treeAPIs is None: pytest.skip("Treebuilder not loaded") p = html5parser.HTMLParser(tree=self.treeAPIs["builder"]) input = self.test['data'] fragmentContainer = self.test['document-fragment'] expected = convertExpected(self.test['document']) scripting = False if 'script-on' in self.test: scripting = True with warnings.catch_warnings(): warnings.simplefilter("error") try: if fragmentContainer: document = p.parseFragment(input, fragmentContainer, scripting=scripting) else: document = p.parse(input, scripting=scripting) except constants.DataLossWarning: pytest.skip("data loss warning") poutput = convertTreeDump(p.tree.testSerializer(document)) namespace_expected = namespaceExpected(r"\1<html \2>", expected) if poutput != namespace_expected: pytest.skip("parser output incorrect") document = self.treeAPIs.get("adapter", lambda x: x)(document) try: output = treewalkers.pprint(Lint( self.treeAPIs["walker"](document))) output = sortattrs(output) expected = sortattrs(expected) diff = "".join( unified_diff([line + "\n" for line in expected.splitlines()], [line + "\n" for line in output.splitlines()], "Expected", "Received")) assert expected == output, "\n".join([ "", "Input:", input, "", "Expected:", expected, "", "Received:", output, "", "Diff:", diff, ]) except NotImplementedError: pytest.skip("tree walker NotImplementedError")
def test_all_tokens(): expected = [{ 'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html' }, { 'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head' }, { 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'head' }, { 'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body' }, { 'data': 'a', 'type': 'Characters' }, { 'data': {}, 'type': 'StartTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div' }, { 'data': 'b', 'type': 'Characters' }, { 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'div' }, { 'data': 'c', 'type': 'Characters' }, { 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'body' }, { 'type': 'EndTag', 'namespace': 'http://www.w3.org/1999/xhtml', 'name': 'html' }] for _, treeCls in sorted(treeTypes.items()): if treeCls is None: continue p = html5parser.HTMLParser(tree=treeCls["builder"]) document = p.parse( "<html><head></head><body>a<div>b</div>c</body></html>") document = treeCls.get("adapter", lambda x: x)(document) output = Lint(treeCls["walker"](document)) for expectedToken, outputToken in zip(expected, output): assert expectedToken == outputToken
def test_fragment_single_char(tree, char): expected = [{'data': char, 'type': 'Characters'}] treeName, treeClass = tree if treeClass is None: pytest.skip("Treebuilder not loaded") parser = html5parser.HTMLParser(tree=treeClass["builder"]) document = parser.parseFragment(char) document = treeClass.get("adapter", lambda x: x)(document) output = Lint(treeClass["walker"](document)) assert list(output) == expected