def test_nobr(self): from mobilize.filters import nobr html_in = "<p>Hello.<br> This is a <br> broken<br>up paragraph.</p>" expected_space = "<p>Hello. This is a broken up paragraph.</p>" expected_nospace = "<p>Hello. This is a brokenup paragraph.</p>" elem_space = html.fromstring(html_in) nobr(elem_space, space=True) space_out = elem2str(elem_space) self.assertSequenceEqual(expected_space, space_out) elem_nospace = html.fromstring(html_in) nobr(elem_nospace, space=False) nospace_out = elem2str(elem_nospace) self.assertSequenceEqual(expected_nospace, nospace_out)
def test_absimgsrc(self): html1_in = '''<div> <p>Hi there.</p> <img src="http://foo.example.com/path/to/bananas.jpg" alt="yellow fruit" width="23" height="480"> <img src="/_mwu/bananatree.jpg" alt="where they come from"> <p>Here's some more.</p> <img src="/fruitpics/strawberry.jpg" alt="berry good"> <p>and then: <img src="standard/nrolling-kiwi.gif" alt="delicious but takes time to peel"> <img src=" alt="GIF data URL"/> <img src="DATA:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAB4AAAAkCAMAAACpD3pbAAAAYFBMVEVZbYftuWNoP0iYbFJDHkBcNEWAVk3JmFzVo17hrmG8jVmQk5+8sJOwgldse4xPKUN0S0r3yXdLJULcwpI4FD75x2f5yniSmJ07Fz/WpF6MYU/wyoShn5XTvZT84bL5xGZPqaqJAAAAdUlEQVQ4y+XTRw7EQAgEQJg8jmtvdOz//9IvaE57c19LQoBAvjuNiMgJmkdrMl69yXjbfNyFtfMN5zo4P0RlPCYAzjN+KoDsGIcKoKGcRkAjLb78JhcS7TyuJRc6WIrWWrqgFodsLrXc95j+yq3Jm/X+n7mXC9defIzz7p9PAAAAAElFTkSuQmCC" alt="PNG data URL"/> <img src="" alt="Pathological HTML!"> </p> </div>''' html1_out = '''<div> <p>Hi there.</p> <img src="http://foo.example.com/path/to/bananas.jpg" alt="yellow fruit" width="23" height="480"> <img src="/_mwu/bananatree.jpg" alt="where they come from"> <p>Here's some more.</p> <img src="http://desktop.example.com/fruitpics/strawberry.jpg" alt="berry good"> <p>and then: <img src="http://desktop.example.com/articles/standard/nrolling-kiwi.gif" alt="delicious but takes time to peel"> <img src=" alt="GIF data URL"/> <img src="DATA:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAB4AAAAkCAMAAACpD3pbAAAAYFBMVEVZbYftuWNoP0iYbFJDHkBcNEWAVk3JmFzVo17hrmG8jVmQk5+8sJOwgldse4xPKUN0S0r3yXdLJULcwpI4FD75x2f5yniSmJ07Fz/WpF6MYU/wyoShn5XTvZT84bL5xGZPqaqJAAAAdUlEQVQ4y+XTRw7EQAgEQJg8jmtvdOz//9IvaE57c19LQoBAvjuNiMgJmkdrMl69yXjbfNyFtfMN5zo4P0RlPCYAzjN+KoDsGIcKoKGcRkAjLb78JhcS7TyuJRc6WIrWWrqgFodsLrXc95j+yq3Jm/X+n7mXC9defIzz7p9PAAAAAElFTkSuQmCC" alt="PNG data URL"/> <img src="" alt="Pathological HTML!"> </p> </div>''' desktop_url = 'http://desktop.example.com/articles/delicious.html' from mobilize.filters.misc import absimgsrc elem = html.fromstring(html1_in) absimgsrc(elem, desktop_url) result = elem2str(elem) self.assertSequenceEqual(normxml(html1_out), normxml(result))
def test_noimgsize(self): from mobilize.filters import noimgsize testdata = [ {'in' : '''<div><img src="http://example.com/booger.png" width="1920" height="1280" alt=""/></div>''', 'out' : '''<div><img src="http://example.com/booger.png" alt=""></div>''', }, ] for ii, td in enumerate(testdata): self.assertSequenceEqual(td['out'], elem2str(apply_filters(td['in'], [noimgsize])))
def test_noattribs(self): ELEMSTR1 = '''<table width="600" style="color: fuscia;"> <tr><td width="200">one</td><td>two</td></tr> <tr><td>three</td><td>four</td></tr> </table> ''' ELEMSTR2 = '''<div> <table width="600" style="color: fuscia;"> <tr><td width="200">one</td><td>two</td></tr> <tr><td>three</td><td>four</td></tr> </table> </div> ''' testdata = [ {'in_str' : ELEMSTR1, 'tags' : ['table'], 'attribs' : ['width', 'style'], 'out_str' : '''<table> <tr><td width="200">one</td><td>two</td></tr> <tr><td>three</td><td>four</td></tr> </table> ''' }, {'in_str' : ELEMSTR2, 'tags' : ['table'], 'attribs' : ['width', 'style'], 'out_str' : '''<div> <table> <tr><td width="200">one</td><td>two</td></tr> <tr><td>three</td><td>four</td></tr> </table> </div> ''' }, {'in_str' : ELEMSTR2, 'tags' : ['table', 'td'], 'attribs' : ['width', 'style'], 'out_str' : '''<div> <table> <tr><td>one</td><td>two</td></tr> <tr><td>three</td><td>four</td></tr> </table> </div> ''' }, ] from mobilize.filters import noattribs for ii, td in enumerate(testdata): elem = html.fragment_fromstring(td['in_str'], create_parent=False) noattribs(elem, td['tags'], td['attribs']) expected = normxml(td['out_str']) actual = normxml(elem2str(elem)) self.assertSequenceEqual(expected, actual)
def test_process(self): testdata = [ {'elem_str' : '<p>Hello</p>', 'idname' : 'beta', 'newelem_str' : '<div class="alpha" id="beta"><p>Hello</p></div>', }, ] for ii, td in enumerate(testdata): component = DummyExtracted('', filters=[], classvalue='alpha') component.elems = [html.fromstring(td['elem_str'])] newelem = component.process(td['idname']) self.assertEqual(newelem, component.elem) self.assertEqual(html.HtmlElement, type(component.elem)) self.assertSequenceEqual(td['newelem_str'], elem2str(component.elem))
def test_chain_filters(self): '''test that filters can be chained''' htmlin = '''<div class="foo" style="color: blue"> <h1 style="font-size: large;">The Headline</h1> <a href="#" onclick="alert('Good Job!');">Click Here</a> </div>''' htmlout = '''<div class="foo"> <h1>The Headline</h1> <a href="#">Click Here</a> </div>''' from mobilize.filters import nomiscattrib from mobilize.filters.remove import noevents_one my_filters = [ nomiscattrib, noevents_one, ] self.assertEquals(elem2str(apply_filters(htmlin, my_filters)), htmlout)
def test_noevents(self): from mobilize.filters.remove import noevents_one testdata = [ {'in' : '''<a href="#" id="makeHPLink" onclick="cnnMakeHP('homepage_set_overlay')" class="realmLink">Make CNN Your Homepage</a>''', 'out' : '''<a href="#" id="makeHPLink" class="realmLink">Make CNN Your Homepage</a>''', }, {'in' : '''<a href="#" id="makeHPLink" ONCLICK="cnnMakeHP('homepage_set_overlay')" class="realmLink">Make CNN Your Homepage</a>''', 'out' : '''<a href="#" id="makeHPLink" class="realmLink">Make CNN Your Homepage</a>''', }, {'in' : '''<a href="#" id="makeHPLink" onClick="cnnMakeHP('homepage_set_overlay')" class="realmLink">Make CNN Your Homepage</a>''', 'out' : '''<a href="#" id="makeHPLink" class="realmLink">Make CNN Your Homepage</a>''', }, {'in' : '''<img src="http://example.com/boo.gif" alt="boo!" onmouseover="alert('boo!');">''', 'out' : '''<img src="http://example.com/boo.gif" alt="boo!">''', }, ] for ii, td in enumerate(testdata): self.assertEquals(td['out'], elem2str(apply_filters(td['in'], [noevents_one])))
def test_nomiscattrib(self): from mobilize.filters import nomiscattrib testdata = [ {'in' : '''<div class="foo" style="background-color: red;">Hello.</div>''', 'out' : '''<div class="foo">Hello.</div>''', }, {'in' : '''<div class="foo" STYLE="background-color: red;">Hello.</div>''', 'out' : '''<div class="foo">Hello.</div>''', }, {'in' : '''<div class="foo" Style="background-color: red;">Hello.</div>''', 'out' : '''<div class="foo">Hello.</div>''', }, {'in' : '''<div><a href="/a">Hello.</a> <a href="http://example.com" target="_blank">New Tab</a></div>''', 'out' : '''<div><a href="/a">Hello.</a> <a href="http://example.com">New Tab</a></div>''', }, ] for ii, td in enumerate(testdata): self.assertEquals(td['out'], elem2str(apply_filters(td['in'], [nomiscattrib])))
def test_squeezebr(self): from mobilize.filters import squeezebr testdata = [ {'in_str' : '''<p>Hi.</p>''', 'out_str' : '''<p>Hi.</p>''', }, {'in_str' : '''<p>Hi.<br>Hey.</p>''', 'out_str' : '''<p>Hi.<br>Hey.</p>''', }, {'in_str' : '''<p>Hi.<br><br>Hey.</p>''', 'out_str' : '''<p>Hi.<br>Hey.</p>''', }, {'in_str' : '''<p>Hi.<br/><br/><br/><br/><br/><br/><br/><br/><br/>Hey.</p>''', 'out_str' : '''<p>Hi.<br>Hey.</p>''', }, {'in_str' : '''<div> <p>Hi.<br><br>Hey.</p> <p>This is some more text <br><br><br><br><br><img src="foo.png" alt="foo"/> </p> </div>''', 'out_str' : '''<div> <p>Hi.<br>Hey.</p> <p>This is some more text <br><img src="foo.png" alt="foo"> </p> </div> ''', }, {'in_str' : '''<p>Hi.<br> <br>Hey.</p>''', 'out_str' : '''<p>Hi.<br>Hey.</p>''', }, {'in_str' : '''<p>Hi.<br>How.<br>Hey.</p>''', 'out_str' : '''<p>Hi.<br>How.<br>Hey.</p>''', }, ] for ii, td in enumerate(testdata): elem = html.fragment_fromstring(td['in_str'], create_parent=False) squeezebr(elem) expected = normxml(td['out_str']) actual = normxml(elem2str(elem)) self.assertSequenceEqual(expected, actual)
def test_resizeiframe(self): from mobilize.filters import resizeiframe testdata = [ {'iframe_str' : '''<p> <iframe width="533" height="330" frameborder="0" allowfullscreen="" src="http://www.youtube.com/embed/HE6uqPPrVfo" title="YouTube video player"></iframe> </p>''', 'resized_str' : '''<p> <iframe width="280" height="173" frameborder="0" allowfullscreen="" src="http://www.youtube.com/embed/HE6uqPPrVfo" title="YouTube video player"></iframe> </p>''', }, {'iframe_str' : '''<iframe width="533" height="330" frameborder="0" allowfullscreen="" src="http://www.youtube.com/embed/HE6uqPPrVfo" title="YouTube video player"></iframe>''', 'resized_str' : '''<iframe width="280" height="173" frameborder="0" allowfullscreen="" src="http://www.youtube.com/embed/HE6uqPPrVfo" title="YouTube video player"></iframe>''', }, {'iframe_str' : '''<p>Nothing to see here.</p>''', 'resized_str' : '''<p>Nothing to see here.</p>''', }, ] for ii, td in enumerate(testdata): iframe_elem = html.fragment_fromstring(td['iframe_str'], create_parent=False) resizeiframe(iframe_elem) self.assertSequenceEqual(normxml(td['resized_str']), normxml(elem2str(iframe_elem)))
def html(self): assert self.elem is not None, 'Must invoke self.extract() and self.process() before rendering to html' return util.elem2str(self.elem)
def test_resizeobject(self): from mobilize.filters import resizeobject testdata = [ {'object_str' : '''<div class="foobar"><ul><li><object width="800" height="344"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"/> <param name="allowFullScreen" value="true"/> <param name="allowscriptaccess" value="always"/> <embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="800" height="344"/> </object></li></ul></div>''', 'resized_str' : '''<div class="foobar"><ul><li><object width="280" height="120"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"> <param name="allowFullScreen" value="true"> <param name="allowscriptaccess" value="always"> <embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280" height="120"></embed> </object></li></ul></div>''', }, {'object_str' : '''<object width="800" height="344"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"> <param name="allowFullScreen" value="true"> <param name="allowscriptaccess" value="always"> <embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="800" height="344"></embed> </object>''', 'resized_str' : '''<object width="280" height="120"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"> <param name="allowFullScreen" value="true"> <param name="allowscriptaccess" value="always"> <embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280" height="120"></embed> </object>''', }, {'object_str' : '''<OBJECT width="800" height="344"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"/> <param name="allowFullScreen" value="true"/> <param name="allowscriptaccess" value="always"/> <EMBED src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="800" height="344"/> </OBJECT>''', 'resized_str' : '''<object width="280" height="120"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"> <param name="allowFullScreen" value="true"> <param name="allowscriptaccess" value="always"> <embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280" height="120"></embed> </object>''', }, # If not height defined, or otherwise can't calculate aspect ratio, just ignore that attribute {'object_str' : '''<OBJECT width="800"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"/> <param name="allowFullScreen" value="true"/> <param name="allowscriptaccess" value="always"/> <EMBED src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="800"/> </OBJECT>''', 'resized_str' : '''<object width="280"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"> <param name="allowFullScreen" value="true"> <param name="allowscriptaccess" value="always"> <embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280"></embed> </object>''', }, {'object_str' : '''<OBJECT> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"/> <param name="allowFullScreen" value="true"/> <param name="allowscriptaccess" value="always"/> <EMBED src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true"/> </OBJECT>''', 'resized_str' : '''<object width="280"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"> <param name="allowFullScreen" value="true"> <param name="allowscriptaccess" value="always"> <embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280"></embed> </object>''', }, {'object_str' : '''<OBJECT width="800" height="beer"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"/> <param name="allowFullScreen" value="true"/> <param name="allowscriptaccess" value="always"/> <EMBED src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="800" height="beer"/> </OBJECT>''', 'resized_str' : '''<object width="280"> <param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US"> <param name="allowFullScreen" value="true"> <param name="allowscriptaccess" value="always"> <embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280"></embed> </object>''', }, {'object_str' : '''<p>Nothing to see here.</p>''', 'resized_str' : '''<p>Nothing to see here.</p>''', }, ] for ii, td in enumerate(testdata): object_elem = html.fragment_fromstring(td['object_str'], create_parent=False) resizeobject(object_elem) self.assertSequenceEqual(normxml(td['resized_str']), normxml(elem2str(object_elem)))
def test_table2divrows(self): testdata = [ {'in_str' : '''<div><table> <tr> <td>Eggs</td> <td>Ham</td> </tr> <tr> <td>Beer</td> <td>Milk</td> </tr> </table></div> ''', 'out_str' : '''<div> <div class="mwu-table2divrows"> <div class="mwu-table2divrows-row0"> <div class="mwu-table2divrows-row0-col0 mwu-table2divrows-col0">Eggs</div> <div class="mwu-table2divrows-row0-col1 mwu-table2divrows-col1">Ham</div> </div> <div class="mwu-table2divrows-row1"> <div class="mwu-table2divrows-row1-col0 mwu-table2divrows-col0">Beer</div> <div class="mwu-table2divrows-row1-col1 mwu-table2divrows-col1">Milk</div> </div> </div> </div> ''', }, #================ {'in_str' : '''<div><table><tbody> <tr> <td>Eggs</td> <td>Ham</td> </tr> <tr> <td>Beer</td> <td>Milk</td> </tr> </tbody></table></div> ''', 'out_str' : '''<div> <div class="mwu-table2divrows"> <div class="mwu-table2divrows-row0"> <div class="mwu-table2divrows-row0-col0 mwu-table2divrows-col0">Eggs</div> <div class="mwu-table2divrows-row0-col1 mwu-table2divrows-col1">Ham</div> </div> <div class="mwu-table2divrows-row1"> <div class="mwu-table2divrows-row1-col0 mwu-table2divrows-col0">Beer</div> <div class="mwu-table2divrows-row1-col1 mwu-table2divrows-col1">Milk</div> </div> </div> </div> ''', }, {'in_str' : '''<div><p>Nothing here.</p></div>''', 'out_str' : '''<div><p>Nothing here.</p></div>''', }, {'in_str' : '''<div><table> <tr> <td><table id="foobar"><tr><td>Whoa</td><td>dude</td></tr></table></td> <td>Key Lime Pie</td> </tr> </table></div>''', 'out_str' : '''<div><div class="mwu-table2divrows"> <div class="mwu-table2divrows-row0"> <div class="mwu-table2divrows-row0-col0 mwu-table2divrows-col0"> <table id="foobar"><tr><td>Whoa</td><td>dude</td></tr></table> </div> <div class="mwu-table2divrows-row0-col1 mwu-table2divrows-col1">Key Lime Pie</div> </div> </div> </div>''', }, {'in_str' : '''<div><table> <tr> <td> Does html like this exist somewhere in the wild? <table id="foobar"><tr><td>Whoa</td><td>dude</td></tr></table> <p>yeah, I bet somewhere it does</p> (probably on some website that gets 10K hits on a slow day) <table id="foobar"><tr><td>Game</td><td>Over Man</td></tr></table> here's some extra trailing text for you too </td> <td>Key Lime Pie</td> </tr> </table></div>''', 'out_str' : '''<div><div class="mwu-table2divrows"> <div class="mwu-table2divrows-row0"> <div class="mwu-table2divrows-row0-col0 mwu-table2divrows-col0"> Does html like this exist somewhere in the wild? <table id="foobar"><tr><td>Whoa</td><td>dude</td></tr></table> <p>yeah, I bet somewhere it does</p> (probably on some website that gets 10K hits on a slow day) <table id="foobar"><tr><td>Game</td><td>Over Man</td></tr></table> here's some extra trailing text for you too </div> <div class="mwu-table2divrows-row0-col1 mwu-table2divrows-col1">Key Lime Pie</div> </div> </div> </div>''', }, {'in_str' : '''<table> <tr> <td>Eggs</td> <td>Ham</td> </tr> <tr> <td>Beer</td> <td>Milk</td> </tr> </table> ''', 'out_str' : '''<div class="mwu-table2divrows"> <div class="mwu-table2divrows-row0"> <div class="mwu-table2divrows-row0-col0 mwu-table2divrows-col0">Eggs</div> <div class="mwu-table2divrows-row0-col1 mwu-table2divrows-col1">Ham</div> </div> <div class="mwu-table2divrows-row1"> <div class="mwu-table2divrows-row1-col0 mwu-table2divrows-col0">Beer</div> <div class="mwu-table2divrows-row1-col1 mwu-table2divrows-col1">Milk</div> </div> </div> ''', }, ] from mobilize.filters import table2divrows for ii, td in enumerate(testdata): in_elem = html.fragment_fromstring(td['in_str'], create_parent=False) table2divrows(in_elem) self.assertSequenceEqual(normxml(td['out_str']), normxml(elem2str(in_elem)))
def test_table2divgroups(self): from mobilize.filters.tables import Spec ELEMSTR1 = '''<div id="some-container"> <table> <tbody> <tr> <td>CONTACT US</td> <td> </td> <td> </td> <td> </td> <tr> <td>123 Main Str</td> <td> </td> <td>OUR TEAM</td> <td> </td> <tr> <td>Springfield, IL</td> <td> </td> <td>Mike Smith</td> <td><img src="/mike-smith.jpg"/></td> <tr> <td>1-800-BUY-DUFF</td> <td> </td> <td>Jen Jones</td> <td><img src="/jen-jones.jpg"/></td> <tr> <td> </td> <td> </td> <td>Scruffy</td> <td><img src="/scruffy-the-dog.jpg"/></td> <tr> </tbody> </table> </div> ''' testdata = [ {'elem_str' : ELEMSTR1, 'specmap' : [], 'out_str' : ''' <div id="some-container"> <div class="mwu-elem-table2divgroups"> </div> </div> ''', }, {'elem_str' : ELEMSTR1, 'specmap' : [ (Spec('idname1', 0, 0, 0, 0)), ], 'out_str' : ''' <div id="some-container"> <div class="mwu-elem-table2divgroups"> <div class="mwu-elem-table2divgroups-group" id="idname1"> <div>CONTACT US</div> </div> </div> </div> ''', }, {'elem_str' : ELEMSTR1, 'specmap' : [ (Spec('idname1', 0, 0, 3, 0)), ], 'out_str' : ''' <div id="some-container"> <div class="mwu-elem-table2divgroups"> <div class="mwu-elem-table2divgroups-group" id="idname1"> <div>CONTACT US</div> <div>123 Main Str</div> <div>Springfield, IL</div> <div>1-800-BUY-DUFF</div> </div> </div> </div> ''', }, {'elem_str' : ELEMSTR1, 'specmap' : [ (Spec('idname1', 0, 0, 0, 0)), (Spec('idname2', 0, 0, 3, 0)), ], 'out_str' : ''' <div id="some-container"> <div class="mwu-elem-table2divgroups"> <div class="mwu-elem-table2divgroups-group" id="idname1"> <div>CONTACT US</div> </div> <div class="mwu-elem-table2divgroups-group" id="idname2"> <div>CONTACT US</div> <div>123 Main Str</div> <div>Springfield, IL</div> <div>1-800-BUY-DUFF</div> </div> </div> </div> ''', }, {'elem_str' : ELEMSTR1, 'specmap' : [ (Spec('idname2', 0, 0, 3, 0)), (Spec('idname1', 0, 0, 0, 0)), ], 'out_str' : ''' <div id="some-container"> <div class="mwu-elem-table2divgroups"> <div class="mwu-elem-table2divgroups-group" id="idname2"> <div>CONTACT US</div> <div>123 Main Str</div> <div>Springfield, IL</div> <div>1-800-BUY-DUFF</div> </div> <div class="mwu-elem-table2divgroups-group" id="idname1"> <div>CONTACT US</div> </div> </div> </div> ''', }, {'elem_str' : ELEMSTR1, 'specmap' : [ (Spec('idname2', 0, 0, 3, 0)), (Spec('idname1', 0, 0, 0, 0)), ], 'out_str' : ''' <div id="some-container"> <div class="mwu-elem-table2divgroups"> <div class="mwu-elem-table2divgroups-group" id="idname2"> <div>CONTACT US</div> <div>123 Main Str</div> <div>Springfield, IL</div> <div>1-800-BUY-DUFF</div> </div> <div class="mwu-elem-table2divgroups-group" id="idname1"> <div>CONTACT US</div> </div> </div> </div> ''', }, {'elem_str' : ELEMSTR1, 'specmap' : [ (Spec('idname1', 0, 0, 4, 0)), ], 'out_str' : ''' <div id="some-container"> <div class="mwu-elem-table2divgroups"> <div class="mwu-elem-table2divgroups-group" id="idname1"> <div>CONTACT US</div> <div>123 Main Str</div> <div>Springfield, IL</div> <div>1-800-BUY-DUFF</div> </div> </div> </div> ''', }, {'elem_str' : ELEMSTR1, 'omit_whitespace' : False, 'specmap' : [ (Spec('idname1', 0, 0, 4, 0)), ], 'out_str' : ''' <div id="some-container"> <div class="mwu-elem-table2divgroups"> <div class="mwu-elem-table2divgroups-group" id="idname1"> <div>CONTACT US</div> <div>123 Main Str</div> <div>Springfield, IL</div> <div>1-800-BUY-DUFF</div> <div> </div> </div> </div> </div> ''', }, {'elem_str' : ELEMSTR1, 'specmap' : [ (Spec('idname1', 1, 2, 4, 3)), ], 'out_str' : ''' <div id="some-container"> <div class="mwu-elem-table2divgroups"> <div class="mwu-elem-table2divgroups-group" id="idname1"> <div> <div>OUR TEAM</div> </div> <div> <div>Mike Smith</div> <div><img src="/mike-smith.jpg"></div> </div> <div> <div>Jen Jones</div> <div><img src="/jen-jones.jpg"></div> </div> <div> <div>Scruffy</div> <div><img src="/scruffy-the-dog.jpg"></div> </div> </div> </div> </div> ''', }, {'elem_str' : '''<div> <table> <tr><td colspan="3">a</td></tr> <tr> <td>b</td> <td>c</td> <td>d</td> </tr> </table> ''', 'specmap' : [ (Spec('idname1', 0, 0, 1, 1)), ], 'out_str' : ''' <div> <div class="mwu-elem-table2divgroups"> <div class="mwu-elem-table2divgroups-group" id="idname1"> <div><div>a</div></div> <div> <div>b</div> <div>c</div> </div> </div> </div> </div> ''', }, ] from mobilize.filters import table2divgroups for ii, td in enumerate(testdata): omit_whitespace = td.get('omit_whitespace', True) elem = html.fromstring(td['elem_str']) table2divgroups(elem, td['specmap'], omit_whitespace=omit_whitespace) expected = normxml(td['out_str']) actual = normxml(elem2str(elem)) self.assertSequenceEqual(expected, actual)