示例#1
0
def test_remove_insignificant_text_nodes():
    html = dedent('''
        <html>
            <head />
            <body>
                <p>
                    one <em>two</em> <strong>three</strong>
                </p>
                <table>
                    <tr>
                        <td>stuff</td>
                    </tr>
                </table>
            </body>
        </html>
    ''')
    target_html = ('<p> one <em>two</em> <strong>three</strong> </p> '
                   '<table><tr><td>stuff</td></tr></table>')

    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(html, target_html)

    # Check that it is idempotent.
    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(html, target_html)
示例#2
0
def test_remove_insignificant_text_nodes():
    html = dedent(
        """
        <html>
            <head />
            <body>
                <p>
                    one <em>two</em> <strong>three</strong>
                </p>
                <table>
                    <tr>
                        <td>stuff</td>
                    </tr>
                </table>
            </body>
        </html>
    """
    )
    target_html = "<p> one <em>two</em> <strong>three</strong> </p> " "<table><tr><td>stuff</td></tr></table>"

    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(html, target_html)

    # Check that it is idempotent.
    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(html, target_html)
示例#3
0
 def test(original, distributed):
     original = parse_minidom(original)
     distributed = parse_minidom(distributed)
     node = get_location(original, [0])
     distribute(node)
     assert_html_equal(
         minidom_tostring(original),
         minidom_tostring(distributed),
     )
 def test(original, distributed):
     original = parse_minidom(original)
     distributed = parse_minidom(distributed)
     node = get_location(original, [0])
     distribute(node)
     assert_html_equal(
         minidom_tostring(original),
         minidom_tostring(distributed),
     )
示例#5
0
文件: tests.py 项目: tex/htmltreediff
def test_parse_comments():
    assert_html_equal(
        minidom_tostring(parse_minidom('<!-- -->')),
        '',
    )
    assert_html_equal(
        minidom_tostring(parse_minidom('<!--\n-->')),
        '',
    )
    assert_html_equal(
        minidom_tostring(parse_minidom('<p>stuff<!-- \n -->stuff</p>')),
        '<p>stuffstuff</p>',
    )
示例#6
0
文件: html.py 项目: tex/htmltreediff
def diff(old_html, new_html, cutoff=0.0, plaintext=False, pretty=False):
    """Show the differences between the old and new html document, as html.

    Return the document html with extra tags added to show changes. Add <ins>
    tags around newly added sections, and <del> tags to show sections that have
    been deleted.
    """
    if plaintext:
        old_dom = parse_text(old_html)
        new_dom = parse_text(new_html)
    else:
        old_dom = parse_minidom(old_html)
        new_dom = parse_minidom(new_html)

    # If the two documents are not similar enough, don't show the changes.
    if not check_text_similarity(old_dom, new_dom, cutoff):
        return (
            '<h2>The differences from the previous version are too large to '
            'show concisely.</h2>')

    dom = dom_diff(old_dom, new_dom)

    # HTML-specific cleanup.
    if not plaintext:
        fix_lists(dom)
        fix_tables(dom)

    # Only return html for the document body contents.
    body_elements = dom.getElementsByTagName('body')
    if len(body_elements) == 1:
        dom = body_elements[0]

    return minidom_tostring(dom, pretty=pretty)
示例#7
0
def diff(old_html, new_html, cutoff=0.0, plaintext=False, pretty=False):
    """Show the differences between the old and new html document, as html.

    Return the document html with extra tags added to show changes. Add <ins>
    tags around newly added sections, and <del> tags to show sections that have
    been deleted.
    """
    if plaintext:
        old_dom = parse_text(old_html)
        new_dom = parse_text(new_html)
    else:
        old_dom = parse_minidom(old_html)
        new_dom = parse_minidom(new_html)

    # If the two documents are not similar enough, don't show the changes.
    if not check_text_similarity(old_dom, new_dom, cutoff):
        return "<h2>The differences from the previous version are too large to " "show concisely.</h2>"

    dom = dom_diff(old_dom, new_dom)

    # HTML-specific cleanup.
    if not plaintext:
        fix_lists(dom)
        fix_tables(dom)

    # Only return html for the document body contents.
    body_elements = dom.getElementsByTagName("body")
    if len(body_elements) == 1:
        dom = body_elements[0]

    return minidom_tostring(dom, pretty=pretty)
示例#8
0
def test_remove_insignificant_text_nodes_nbsp():
    html = dedent(
        """
        <table>
        <tbody>
        <tr>
            <td> </td>
            <td>&#160;</td>
            <td>&nbsp;</td>
        </tr>
        </tbody>
        </table>
    """
    )
    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(html, ("<table><tbody><tr><td> </td><td> </td><td> </td>" "</tr></tbody></table>"))
示例#9
0
def test_remove_insignificant_text_nodes_nbsp():
    html = dedent('''
        <table>
        <tbody>
        <tr>
            <td> </td>
            <td>&#160;</td>
            <td>&nbsp;</td>
        </tr>
        </tbody>
        </table>
    ''')
    dom = parse_minidom(html)
    remove_insignificant_text_nodes(dom)
    html = minidom_tostring(dom)
    assert_equal(
        html,
        ('<table><tbody><tr><td> </td><td> </td><td> </td>'
         '</tr></tbody></table>'),
    )
示例#10
0
 def test():
     dom = parse_minidom(old_html)
     assert_equal(minidom_tostring(dom), target)
     assert_equal(remove_xml_declaration(dom.toxml()), target_raw)
示例#11
0
 def test():
     changes_dom = parse_minidom(changes, strict_xml=True)
     fix_tables(changes_dom)
     assert_html_equal(minidom_tostring(changes_dom), fixed_changes)
示例#12
0
 def test():
     changes_dom = parse_minidom(changes)
     fix_lists(changes_dom)
     assert_html_equal(minidom_tostring(changes_dom), fixed_changes)
示例#13
0
def strip_changes_new(html):
    dom = parse_minidom(html)
    _strip_changes_new(dom)
    return minidom_tostring(dom)
示例#14
0
 def test():
     dom = parse_minidom(old_html)
     assert_equal(minidom_tostring(dom), target)
     assert_equal(remove_xml_declaration(dom.toxml()), target_raw)
示例#15
0
def html_patch(old_html, edit_script):
    old_dom = parse_minidom(old_html)
    split_text_nodes(old_dom)
    runner = EditScriptRunner(old_dom, edit_script)
    return minidom_tostring(runner.run_edit_script())
示例#16
0
def strip_changes_new(html):
    dom = parse_minidom(html)
    _strip_changes_new(dom)
    return minidom_tostring(dom)
示例#17
0
def reverse_changes_html(changes):
    dom = parse_minidom(changes)
    reverse_changes(dom)
    return minidom_tostring(dom)
示例#18
0
def test_xml_diff():
    for test_name, old_html, new_html, target in test_cases:
        old_dom = parse_minidom(old_html, strict_xml=True)
        new_dom = parse_minidom(new_html, strict_xml=True)
        changes_xml = minidom_tostring(dom_diff(old_dom, new_dom))
        assert_equal(changes_xml, target)
示例#19
0
def remove_attributes(html):
    dom = parse_minidom(html)
    remove_dom_attributes(dom)
    return minidom_tostring(dom)
示例#20
0
def test_xml_diff():
    for test_name, old_html, new_html, target in test_cases:
        old_dom = parse_minidom(old_html, strict_xml=True)
        new_dom = parse_minidom(new_html, strict_xml=True)
        changes_xml = minidom_tostring(dom_diff(old_dom, new_dom))
        assert_equal(changes_xml, target)
示例#21
0
 def test():
     changes_dom = parse_minidom(changes)
     fix_lists(changes_dom)
     assert_html_equal(minidom_tostring(changes_dom), fixed_changes)
示例#22
0
def html_patch(old_html, edit_script):
    old_dom = parse_minidom(old_html)
    split_text_nodes(old_dom)
    runner = EditScriptRunner(old_dom, edit_script)
    return minidom_tostring(runner.run_edit_script())
示例#23
0
 def test():
     changes_dom = parse_minidom(changes, strict_xml=True)
     fix_tables(changes_dom)
     assert_html_equal(minidom_tostring(changes_dom), fixed_changes)
示例#24
0
def remove_attributes(html):
    dom = parse_minidom(html)
    remove_dom_attributes(dom)
    return minidom_tostring(dom)
示例#25
0
def reverse_changes_html(changes):
    dom = parse_minidom(changes)
    reverse_changes(dom)
    return minidom_tostring(dom)