def runParserTest(self, innerHTML, input, expected, errors, treeClass): #XXX - move this out into the setup function #concatenate all consecutive character tokens into a single token p = html5parser.HTMLParser(tree = treeClass) try: if innerHTML: document = p.parseFragment(StringIO.StringIO(input), innerHTML) else: try: document = p.parse(StringIO.StringIO(input)) except constants.DataLossWarning: sys.stderr.write("Test input causes known dataloss, skipping") return except: errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, "\nTraceback:", traceback.format_exc()]) self.assertTrue(False, errorMsg) output = convertTreeDump(p.tree.testSerializer(document)) output = attrlist.sub(sortattrs, output) expected = convertExpected(expected) expected = attrlist.sub(sortattrs, expected) errorMsg = "\n".join(["\n\nInput:", input, "\nExpected:", expected, "\nReceived:", output]) self.assertEquals(expected, output, errorMsg) errStr = ["Line: %i Col: %i %s %s"%(line, col, constants.E[errorcode], datavars) for ((line,col), errorcode, datavars) in p.errors] errorMsg2 = "\n".join(["\n\nInput:", input, "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors), "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) if checkParseErrors: self.assertEquals(len(p.errors), len(errors), errorMsg2)
def runTest(self, innerHTML, input, expected, errors, treeClass): if innerHTML is not None: innerHTML = str(innerHTML, "utf8") expected = str(expected, "utf8") if errors is not None: errors = str(errors, "utf8") errors = errors.split("\n") p = html5parser.HTMLParser(tree=treeClass["builder"]) try: if innerHTML: document = p.parseFragment(io.BytesIO(input), innerHTML) else: document = p.parse(io.BytesIO(input)) except constants.DataLossWarning: #Ignore testcases we know we don't pass return document = treeClass.get("adapter", lambda x: x)(document) try: output = convertTokens(treeClass["walker"](document)) output = attrlist.sub(sortattrs, output) expected = attrlist.sub(sortattrs, convertExpected(expected)) self.assertEquals( expected, output, "\n".join([ "", "Input:", str(input, "utf8"), "", "Expected:", expected, "", "Received:", output ])) except NotImplementedError: pass # Amnesty for those that confess...
def runTest(self, innerHTML, input, expected, errors, treeClass): if innerHTML is not None: innerHTML = str(innerHTML, "utf8") expected = str(expected, "utf8") if errors is not None: errors = str(errors, "utf8") errors = errors.split("\n") p = html5parser.HTMLParser(tree = treeClass["builder"]) try: if innerHTML: document = p.parseFragment(io.BytesIO(input), innerHTML) else: document = p.parse(io.BytesIO(input)) except constants.DataLossWarning: #Ignore testcases we know we don't pass return document = treeClass.get("adapter", lambda x: x)(document) try: output = convertTokens(treeClass["walker"](document)) output = attrlist.sub(sortattrs, output) expected = attrlist.sub(sortattrs, convertExpected(expected)) self.assertEquals(expected, output, "\n".join([ "", "Input:", str(input, "utf8"), "", "Expected:", expected, "", "Received:", output ])) except NotImplementedError: pass # Amnesty for those that confess...
def runParserTest(innerHTML, input, expected, errors, treeClass, namespaceHTMLElements): #XXX - move this out into the setup function #concatenate all consecutive character tokens into a single token try: p = html5parser.HTMLParser(tree = treeClass, namespaceHTMLElements=namespaceHTMLElements) except constants.DataLossWarning: return try: if innerHTML: document = p.parseFragment(input, innerHTML) else: try: document = p.parse(input) except constants.DataLossWarning: return except: errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected, u"\nTraceback:", traceback.format_exc()]) assert False, errorMsg.encode("utf8") output = convertTreeDump(p.tree.testSerializer(document)) expected = convertExpected(expected) if namespaceHTMLElements: expected = namespaceExpected(r"\1<html \2>", expected) errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected, u"\nReceived:", output]) assert expected == output, errorMsg.encode("utf8") # errStr = [u"Line: %i Col: %i %s"%(line, col, # constants.E[errorcode] % datavars if isinstance(datavars, dict) else (datavars,)) for # ((line,col), errorcode, datavars) in p.errors] def datavars_sub(datavars, errorcode): if isinstance(datavars, dict): return datavars else: errstr = constants.E[errorcode] tgt = re.compile("(\%\(\w*\)s)") r = tgt.search(errstr) d = {} for i,g in enumerate(r.groups()): d[g[2:-2]] = datavars[i] return d errStr = [u"Line: %i Col: %i %s"%(line, col, constants.E[errorcode] % datavars_sub(datavars, errorcode)) for ((line,col), errorcode, datavars) in p.errors] errorMsg2 = u"\n".join([u"\n\nInput:", input, u"\nExpected errors (" + str(len(errors)) + u"):\n" + u"\n".join(errors), u"\nActual errors (" + str(len(p.errors)) + u"):\n" + u"\n".join(errStr)]) if checkParseErrors: assert len(p.errors) == len(errors), errorMsg2.encode("utf-8")
def runParserTest(innerHTML, input, expected, errors, treeClass, namespaceHTMLElements): #XXX - move this out into the setup function #concatenate all consecutive character tokens into a single token try: p = html5parser.HTMLParser(tree=treeClass, namespaceHTMLElements=namespaceHTMLElements) except constants.DataLossWarning: return try: if innerHTML: document = p.parseFragment(input, innerHTML) else: try: document = p.parse(input) except constants.DataLossWarning: return except: errorMsg = u"\n".join([ u"\n\nInput:", input, u"\nExpected:", expected, u"\nTraceback:", traceback.format_exc().decode('utf8') ]) assert False, errorMsg output = convertTreeDump(p.tree.testSerializer(document)) expected = convertExpected(expected) if namespaceHTMLElements: expected = namespaceExpected(ur"\1<html \2>", expected) errorMsg = u"\n".join([ u"\n\nInput:", input, u"\nExpected:", expected, u"\nReceived:", output ]) assert expected == output, errorMsg errStr = [ u"Line: %i Col: %i %s" % (line, col, constants.E[errorcode] % datavars if isinstance(datavars, dict) else (datavars, )) for ((line, col), errorcode, datavars) in p.errors ] errorMsg2 = u"\n".join([ u"\n\nInput:", input, u"\nExpected errors (" + unicode(len(errors)) + u"):\n" + u"\n".join(errors), u"\nActual errors (" + unicode(len(p.errors)) + u"):\n" + u"\n".join(errStr) ]) if checkParseErrors: assert len(p.errors) == len(errors), errorMsg2
def runParserTest(self, innerHTML, input, expected, errors, treeClass): #XXX - move this out into the setup function #concatenate all consecutive character tokens into a single token p = html5parser.HTMLParser(tree = treeClass) if innerHTML: innerHTML = str(innerHTML, "utf8") if errors: errors = str(errors, "utf8") errors = errors.split("\n") expected = str(expected, "utf8") try: if innerHTML: document = p.parseFragment(io.BytesIO(input), innerHTML) else: try: document = p.parse(io.BytesIO(input)) except constants.DataLossWarning: sys.stderr.write("Test input causes known dataloss, skipping") return except: errorMsg = "\n".join(["\n\nInput:", str(input, "utf8"), "\nExpected:", expected, "\nTraceback:", traceback.format_exc()]) self.assertTrue(False, errorMsg) output = convertTreeDump(p.tree.testSerializer(document)) output = attrlist.sub(sortattrs, output) expected = convertExpected(expected) expected = attrlist.sub(sortattrs, expected) errorMsg = "\n".join(["\n\nInput:", str(input, "utf8"), "\nExpected:", expected, "\nReceived:", output]) self.assertEquals(expected, output, errorMsg) errStr = ["Line: %i Col: %i %s %s"%(line, col, constants.E[errorcode], datavars) for ((line,col), errorcode, datavars) in p.errors] errorMsg2 = "\n".join(["\n\nInput:", str(input, "utf8"), "\nExpected errors (" + str(len(errors)) + "):\n" + "\n".join(errors), "\nActual errors (" + str(len(p.errors)) + "):\n" + "\n".join(errStr)]) if checkParseErrors: self.assertEquals(len(p.errors), len(errors), errorMsg2)
def runParserTest(self, innerHTML, input, expected, errors, treeClass, namespaceHTMLElements): #XXX - move this out into the setup function #concatenate all consecutive character tokens into a single token try: p = html5parser.HTMLParser(tree = treeClass, namespaceHTMLElements=namespaceHTMLElements) except constants.DataLossWarning: return try: if innerHTML: document = p.parseFragment(input, innerHTML) else: try: document = p.parse(input) except constants.DataLossWarning: return except: errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected, u"\nTraceback:", traceback.format_exc()]) self.assertTrue(False, errorMsg.encode("utf8")) output = convertTreeDump(p.tree.testSerializer(document)) output = attrlist.sub(sortattrs, output) expected = convertExpected(expected) expected = attrlist.sub(sortattrs, expected) if namespaceHTMLElements: expected = namespaceExpected(r"\1<html \2>", expected) errorMsg = u"\n".join([u"\n\nInput:", input, u"\nExpected:", expected, u"\nReceived:", output]) self.assertEquals(expected, output, errorMsg.encode("utf8")) errStr = [u"Line: %i Col: %i %s"%(line, col, constants.E[errorcode] % datavars if isinstance(datavars, dict) else (datavars,)) for ((line,col), errorcode, datavars) in p.errors] errorMsg2 = u"\n".join([u"\n\nInput:", input, u"\nExpected errors (" + str(len(errors)) + u"):\n" + u"\n".join(errors), u"\nActual errors (" + str(len(p.errors)) + u"):\n" + u"\n".join(errStr)]) if checkParseErrors: self.assertEquals(len(p.errors), len(errors), errorMsg2.encode("utf-8"))
def runTreewalkerTest(innerHTML, input, expected, errors, treeClass): try: p = html5parser.HTMLParser(tree=treeClass["builder"]) if innerHTML: document = p.parseFragment(input, innerHTML) else: document = p.parse(input) except constants.DataLossWarning: #Ignore testcases we know we don't pass return document = treeClass.get("adapter", lambda x: x)(document) try: output = convertTokens(treeClass["walker"](document)) output = attrlist.sub(sortattrs, output) expected = attrlist.sub(sortattrs, convertExpected(expected)) assert expected == output, "\n".join([ "", "Input:", input, "", "Expected:", expected, "", "Received:", output ]) except NotImplementedError: pass # Amnesty for those that confess...
def run_test(innerHTML, input, expected, errors, treeClass): try: p = html5parser.HTMLParser(tree = treeClass["builder"]) if innerHTML: document = p.parseFragment(StringIO.StringIO(input), innerHTML) else: document = p.parse(StringIO.StringIO(input)) except constants.DataLossWarning: #Ignore testcases we know we don't pass return document = treeClass.get("adapter", lambda x: x)(document) try: output = convertTokens(treeClass["walker"](document)) output = attrlist.sub(sortattrs, output) expected = attrlist.sub(sortattrs, convertExpected(expected)) assert expected == output, "\n".join([ "", "Input:", input, "", "Expected:", expected, "", "Received:", output ]) except NotImplementedError: pass # Amnesty for those that confess...