def test_remove_insignificant_text_nodes(): html = dedent(''' <html> <head /> <body> <p> one <em>two</em> <strong>three</strong> </p> <table> <tr> <td>stuff</td> </tr> </table> </body> </html> ''') target_html = ('<p> one <em>two</em> <strong>three</strong> </p> ' '<table><tr><td>stuff</td></tr></table>') dom = parse_minidom(html) remove_insignificant_text_nodes(dom) html = minidom_tostring(dom) assert_equal(html, target_html) # Check that it is idempotent. dom = parse_minidom(html) remove_insignificant_text_nodes(dom) html = minidom_tostring(dom) assert_equal(html, target_html)
def test_remove_insignificant_text_nodes(): html = dedent( """ <html> <head /> <body> <p> one <em>two</em> <strong>three</strong> </p> <table> <tr> <td>stuff</td> </tr> </table> </body> </html> """ ) target_html = "<p> one <em>two</em> <strong>three</strong> </p> " "<table><tr><td>stuff</td></tr></table>" dom = parse_minidom(html) remove_insignificant_text_nodes(dom) html = minidom_tostring(dom) assert_equal(html, target_html) # Check that it is idempotent. dom = parse_minidom(html) remove_insignificant_text_nodes(dom) html = minidom_tostring(dom) assert_equal(html, target_html)
def test_remove_insignificant_text_nodes_nbsp(): html = dedent( """ <table> <tbody> <tr> <td> </td> <td> </td> <td> </td> </tr> </tbody> </table> """ ) dom = parse_minidom(html) remove_insignificant_text_nodes(dom) html = minidom_tostring(dom) assert_equal(html, ("<table><tbody><tr><td> </td><td> </td><td> </td>" "</tr></tbody></table>"))
def test_remove_insignificant_text_nodes_nbsp(): html = dedent(''' <table> <tbody> <tr> <td> </td> <td> </td> <td> </td> </tr> </tbody> </table> ''') dom = parse_minidom(html) remove_insignificant_text_nodes(dom) html = minidom_tostring(dom) assert_equal( html, ('<table><tbody><tr><td> </td><td> </td><td> </td>' '</tr></tbody></table>'), )