def runTreewalkerTest(innerHTML, input, expected, errors, treeClass):
    warnings.resetwarnings()
    warnings.simplefilter("error")
    try:
        p = html5parser.HTMLParser(tree=treeClass["builder"])
        if innerHTML:
            document = p.parseFragment(input, innerHTML)
        else:
            document = p.parse(input)
    except constants.DataLossWarning:
        # Ignore testcases we know we don't pass
        return

    document = treeClass.get("adapter", lambda x: x)(document)
    try:
        output = treewalkers.pprint(Lint(treeClass["walker"](document)))
        output = attrlist.sub(sortattrs, output)
        expected = attrlist.sub(sortattrs, convertExpected(expected))
        diff = "".join(unified_diff([line + "\n" for line in expected.splitlines()],
                                    [line + "\n" for line in output.splitlines()],
                                    "Expected", "Received"))
        assert expected == output, "\n".join([
            "", "Input:", input,
                "", "Expected:", expected,
                "", "Received:", output,
                "", "Diff:", diff,
        ])
    except NotImplementedError:
        pass  # Amnesty for those that confess...
Пример #2
0
def test_maintain_attribute_order_adjusted(treeName):
    treeAPIs = treeTypes[treeName]
    if treeAPIs is None:
        pytest.skip("Treebuilder not loaded")

    # generate loads to maximize the chance a hash-based mutation will occur
    data = "<svg a=1 refx=2 b=3 xml:lang=4 c=5>"

    parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
    document = parser.parseFragment(data)

    document = treeAPIs.get("adapter", lambda x: x)(document)
    output = list(Lint(treeAPIs["walker"](document)))

    assert len(output) == 2
    assert output[0]['type'] == 'StartTag'
    assert output[1]['type'] == "EndTag"

    attrs_out = output[0]['data']

    assert list(attrs_out.items()) == [
        ((None, 'a'), '1'), ((None, 'refX'), '2'), ((None, 'b'), '3'),
        (('http://www.w3.org/XML/1998/namespace', 'lang'), '4'),
        ((None, 'c'), '5')
    ]
Пример #3
0
def test_maintain_attribute_order(treeName):
    treeAPIs = treeTypes[treeName]
    if treeAPIs is None:
        pytest.skip("Treebuilder not loaded")

    # generate loads to maximize the chance a hash-based mutation will occur
    attrs = [(unichr(x), text_type(i))
             for i, x in enumerate(range(ord('a'), ord('z')))]
    data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"

    parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
    document = parser.parseFragment(data)

    document = treeAPIs.get("adapter", lambda x: x)(document)
    output = list(Lint(treeAPIs["walker"](document)))

    assert len(output) == 2
    assert output[0]['type'] == 'StartTag'
    assert output[1]['type'] == "EndTag"

    attrs_out = output[0]['data']
    assert len(attrs) == len(attrs_out)

    for (in_name, in_value), (out_name,
                              out_value) in zip(attrs, attrs_out.items()):
        assert (None, in_name) == out_name
        assert in_value == out_value
Пример #4
0
def test_lxml_xml():
    expected = [{
        'data': {},
        'name': 'div',
        'namespace': None,
        'type': 'StartTag'
    }, {
        'data': {},
        'name': 'div',
        'namespace': None,
        'type': 'StartTag'
    }, {
        'name': 'div',
        'namespace': None,
        'type': 'EndTag'
    }, {
        'name': 'div',
        'namespace': None,
        'type': 'EndTag'
    }]

    lxmltree = lxml.etree.fromstring('<div><div></div></div>')
    walker = treewalkers.getTreeWalker('lxml')
    output = Lint(walker(lxmltree))

    assert list(output) == expected
Пример #5
0
def serialize_html(input, options):
    options = dict([(str(k), v) for k, v in options.items()])
    encoding = options.get("encoding", None)
    if "encoding" in options:
        del options["encoding"]
    stream = Lint(JsonWalker(input), False)
    serializer = HTMLSerializer(alphabetical_attributes=True, **options)
    return serializer.render(stream, encoding)
Пример #6
0
    def runtest(self):
        if self.treeAPIs is None:
            pytest.skip("Treebuilder not loaded")

        p = html5parser.HTMLParser(tree=self.treeAPIs["builder"])

        input = self.test['data']
        fragmentContainer = self.test['document-fragment']
        expected = convertExpected(self.test['document'])

        scripting = False
        if 'script-on' in self.test:
            scripting = True

        with warnings.catch_warnings():
            warnings.simplefilter("error")
            try:
                if fragmentContainer:
                    document = p.parseFragment(input,
                                               fragmentContainer,
                                               scripting=scripting)
                else:
                    document = p.parse(input, scripting=scripting)
            except constants.DataLossWarning:
                pytest.skip("data loss warning")

        poutput = convertTreeDump(p.tree.testSerializer(document))
        namespace_expected = namespaceExpected(r"\1<html \2>", expected)
        if poutput != namespace_expected:
            pytest.skip("parser output incorrect")

        document = self.treeAPIs.get("adapter", lambda x: x)(document)

        try:
            output = treewalkers.pprint(Lint(
                self.treeAPIs["walker"](document)))
            output = sortattrs(output)
            expected = sortattrs(expected)
            diff = "".join(
                unified_diff([line + "\n" for line in expected.splitlines()],
                             [line + "\n" for line in output.splitlines()],
                             "Expected", "Received"))
            assert expected == output, "\n".join([
                "",
                "Input:",
                input,
                "",
                "Expected:",
                expected,
                "",
                "Received:",
                output,
                "",
                "Diff:",
                diff,
            ])
        except NotImplementedError:
            pytest.skip("tree walker NotImplementedError")
Пример #7
0
def test_all_tokens():
    expected = [{
        'data': {},
        'type': 'StartTag',
        'namespace': 'http://www.w3.org/1999/xhtml',
        'name': 'html'
    }, {
        'data': {},
        'type': 'StartTag',
        'namespace': 'http://www.w3.org/1999/xhtml',
        'name': 'head'
    }, {
        'type': 'EndTag',
        'namespace': 'http://www.w3.org/1999/xhtml',
        'name': 'head'
    }, {
        'data': {},
        'type': 'StartTag',
        'namespace': 'http://www.w3.org/1999/xhtml',
        'name': 'body'
    }, {
        'data': 'a',
        'type': 'Characters'
    }, {
        'data': {},
        'type': 'StartTag',
        'namespace': 'http://www.w3.org/1999/xhtml',
        'name': 'div'
    }, {
        'data': 'b',
        'type': 'Characters'
    }, {
        'type': 'EndTag',
        'namespace': 'http://www.w3.org/1999/xhtml',
        'name': 'div'
    }, {
        'data': 'c',
        'type': 'Characters'
    }, {
        'type': 'EndTag',
        'namespace': 'http://www.w3.org/1999/xhtml',
        'name': 'body'
    }, {
        'type': 'EndTag',
        'namespace': 'http://www.w3.org/1999/xhtml',
        'name': 'html'
    }]
    for _, treeCls in sorted(treeTypes.items()):
        if treeCls is None:
            continue
        p = html5parser.HTMLParser(tree=treeCls["builder"])
        document = p.parse(
            "<html><head></head><body>a<div>b</div>c</body></html>")
        document = treeCls.get("adapter", lambda x: x)(document)
        output = Lint(treeCls["walker"](document))
        for expectedToken, outputToken in zip(expected, output):
            assert expectedToken == outputToken
Пример #8
0
def test_fragment_single_char(tree, char):
    expected = [{'data': char, 'type': 'Characters'}]

    treeName, treeClass = tree
    if treeClass is None:
        pytest.skip("Treebuilder not loaded")

    parser = html5parser.HTMLParser(tree=treeClass["builder"])
    document = parser.parseFragment(char)
    document = treeClass.get("adapter", lambda x: x)(document)
    output = Lint(treeClass["walker"](document))

    assert list(output) == expected