示例#1
0
 def test_nobr(self):
     from mobilize.filters import nobr
     html_in = "<p>Hello.<br> This is a <br> broken<br>up paragraph.</p>"
     expected_space = "<p>Hello. This is a  broken up paragraph.</p>"
     expected_nospace = "<p>Hello. This is a  brokenup paragraph.</p>"
     elem_space = html.fromstring(html_in)
     nobr(elem_space, space=True)
     space_out = elem2str(elem_space)
     self.assertSequenceEqual(expected_space, space_out)
     
     elem_nospace = html.fromstring(html_in)
     nobr(elem_nospace, space=False)
     nospace_out = elem2str(elem_nospace)
     self.assertSequenceEqual(expected_nospace, nospace_out)
示例#2
0
    def test_absimgsrc(self):
        html1_in = '''<div>
<p>Hi there.</p>
<img src="http://foo.example.com/path/to/bananas.jpg" alt="yellow fruit" width="23" height="480">
<img src="/_mwu/bananatree.jpg" alt="where they come from">
<p>Here's some more.</p>
<img src="/fruitpics/strawberry.jpg" alt="berry good">
<p>and then:
<img src="standard/nrolling-kiwi.gif" alt="delicious but takes time to peel">
<img src=" alt="GIF data URL"/>
<img src="DATA:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAB4AAAAkCAMAAACpD3pbAAAAYFBMVEVZbYftuWNoP0iYbFJDHkBcNEWAVk3JmFzVo17hrmG8jVmQk5+8sJOwgldse4xPKUN0S0r3yXdLJULcwpI4FD75x2f5yniSmJ07Fz/WpF6MYU/wyoShn5XTvZT84bL5xGZPqaqJAAAAdUlEQVQ4y+XTRw7EQAgEQJg8jmtvdOz//9IvaE57c19LQoBAvjuNiMgJmkdrMl69yXjbfNyFtfMN5zo4P0RlPCYAzjN+KoDsGIcKoKGcRkAjLb78JhcS7TyuJRc6WIrWWrqgFodsLrXc95j+yq3Jm/X+n7mXC9defIzz7p9PAAAAAElFTkSuQmCC" alt="PNG data URL"/>
<img src="" alt="Pathological HTML!">
</p>
</div>'''
        html1_out = '''<div>
<p>Hi there.</p>
<img src="http://foo.example.com/path/to/bananas.jpg" alt="yellow fruit" width="23" height="480">
<img src="/_mwu/bananatree.jpg" alt="where they come from">
<p>Here's some more.</p>
<img src="http://desktop.example.com/fruitpics/strawberry.jpg" alt="berry good">
<p>and then:
<img src="http://desktop.example.com/articles/standard/nrolling-kiwi.gif" alt="delicious but takes time to peel">
<img src=" alt="GIF data URL"/>
<img src="DATA:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAB4AAAAkCAMAAACpD3pbAAAAYFBMVEVZbYftuWNoP0iYbFJDHkBcNEWAVk3JmFzVo17hrmG8jVmQk5+8sJOwgldse4xPKUN0S0r3yXdLJULcwpI4FD75x2f5yniSmJ07Fz/WpF6MYU/wyoShn5XTvZT84bL5xGZPqaqJAAAAdUlEQVQ4y+XTRw7EQAgEQJg8jmtvdOz//9IvaE57c19LQoBAvjuNiMgJmkdrMl69yXjbfNyFtfMN5zo4P0RlPCYAzjN+KoDsGIcKoKGcRkAjLb78JhcS7TyuJRc6WIrWWrqgFodsLrXc95j+yq3Jm/X+n7mXC9defIzz7p9PAAAAAElFTkSuQmCC" alt="PNG data URL"/>
<img src="" alt="Pathological HTML!">
</p>
</div>'''
        desktop_url = 'http://desktop.example.com/articles/delicious.html'
        from mobilize.filters.misc import absimgsrc
        elem = html.fromstring(html1_in)
        absimgsrc(elem, desktop_url)
        result = elem2str(elem)
        self.assertSequenceEqual(normxml(html1_out), normxml(result))
示例#3
0
 def test_noimgsize(self):
     from mobilize.filters import noimgsize
     testdata = [
         {'in'  : '''<div><img src="http://example.com/booger.png" width="1920" height="1280" alt=""/></div>''',
          'out' :  '''<div><img src="http://example.com/booger.png" alt=""></div>''',
          },
         ]
     for ii, td in enumerate(testdata):
         self.assertSequenceEqual(td['out'], elem2str(apply_filters(td['in'], [noimgsize])))
示例#4
0
    def test_noattribs(self):
        ELEMSTR1 = '''<table width="600" style="color: fuscia;">
<tr><td width="200">one</td><td>two</td></tr>
<tr><td>three</td><td>four</td></tr>
</table>
'''
        ELEMSTR2 = '''<div>
<table width="600" style="color: fuscia;">
<tr><td width="200">one</td><td>two</td></tr>
<tr><td>three</td><td>four</td></tr>
</table>
</div>
'''
        testdata = [
            {'in_str' : ELEMSTR1,
             'tags' : ['table'],
             'attribs' : ['width', 'style'],
             'out_str' : '''<table>
<tr><td width="200">one</td><td>two</td></tr>
<tr><td>three</td><td>four</td></tr>
</table>
'''
             },
            {'in_str' : ELEMSTR2,
             'tags' : ['table'],
             'attribs' : ['width', 'style'],
             'out_str' : '''<div>
<table>
<tr><td width="200">one</td><td>two</td></tr>
<tr><td>three</td><td>four</td></tr>
</table>
</div>
'''
             },
            {'in_str' : ELEMSTR2,
             'tags' : ['table', 'td'],
             'attribs' : ['width', 'style'],
             'out_str' : '''<div>
<table>
<tr><td>one</td><td>two</td></tr>
<tr><td>three</td><td>four</td></tr>
</table>
</div>
'''
             },
            ]
        from mobilize.filters import noattribs
        for ii, td in enumerate(testdata):
            elem = html.fragment_fromstring(td['in_str'], create_parent=False)
            noattribs(elem, td['tags'], td['attribs'])
            expected = normxml(td['out_str'])
            actual = normxml(elem2str(elem))
            self.assertSequenceEqual(expected, actual)
示例#5
0
 def test_process(self):
     testdata = [
         {'elem_str'    : '<p>Hello</p>',
          'idname'      : 'beta',
          'newelem_str' : '<div class="alpha" id="beta"><p>Hello</p></div>',
          },
         ]
     for ii, td in enumerate(testdata):
         component = DummyExtracted('', filters=[], classvalue='alpha')
         component.elems = [html.fromstring(td['elem_str'])]
         newelem = component.process(td['idname'])
         self.assertEqual(newelem, component.elem)
         self.assertEqual(html.HtmlElement, type(component.elem))
         self.assertSequenceEqual(td['newelem_str'], elem2str(component.elem))
示例#6
0
    def test_chain_filters(self):
        '''test that filters can be chained'''
        htmlin = '''<div class="foo" style="color: blue">
<h1 style="font-size: large;">The Headline</h1>
<a href="#" onclick="alert('Good Job!');">Click Here</a>
</div>'''
        htmlout = '''<div class="foo">
<h1>The Headline</h1>
<a href="#">Click Here</a>
</div>'''
        from mobilize.filters import nomiscattrib
        from mobilize.filters.remove import noevents_one
        my_filters = [
            nomiscattrib,
            noevents_one,
            ]
        self.assertEquals(elem2str(apply_filters(htmlin, my_filters)), htmlout)
示例#7
0
 def test_noevents(self):
     from mobilize.filters.remove import noevents_one
     testdata = [
         {'in'  : '''<a href="#" id="makeHPLink" onclick="cnnMakeHP('homepage_set_overlay')" class="realmLink">Make CNN Your Homepage</a>''',
          'out' : '''<a href="#" id="makeHPLink" class="realmLink">Make CNN Your Homepage</a>''',
          },
         {'in'  : '''<a href="#" id="makeHPLink" ONCLICK="cnnMakeHP('homepage_set_overlay')" class="realmLink">Make CNN Your Homepage</a>''',
          'out' : '''<a href="#" id="makeHPLink" class="realmLink">Make CNN Your Homepage</a>''',
          },
         {'in'  : '''<a href="#" id="makeHPLink" onClick="cnnMakeHP('homepage_set_overlay')" class="realmLink">Make CNN Your Homepage</a>''',
          'out' : '''<a href="#" id="makeHPLink" class="realmLink">Make CNN Your Homepage</a>''',
          },
         {'in'  : '''<img src="http://example.com/boo.gif" alt="boo!" onmouseover="alert('boo!');">''',
          'out' : '''<img src="http://example.com/boo.gif" alt="boo!">''',
          },
         ]
     for ii, td in enumerate(testdata):
         self.assertEquals(td['out'], elem2str(apply_filters(td['in'], [noevents_one])))
示例#8
0
 def test_nomiscattrib(self):
     from mobilize.filters import nomiscattrib
     testdata = [
         {'in'  : '''<div class="foo" style="background-color: red;">Hello.</div>''',
          'out' :  '''<div class="foo">Hello.</div>''',
          },
         {'in'  : '''<div class="foo" STYLE="background-color: red;">Hello.</div>''',
          'out' :  '''<div class="foo">Hello.</div>''',
          },
         {'in'  : '''<div class="foo" Style="background-color: red;">Hello.</div>''',
          'out' :  '''<div class="foo">Hello.</div>''',
          },
         {'in'  : '''<div><a href="/a">Hello.</a> <a href="http://example.com" target="_blank">New Tab</a></div>''',
         'out'  : '''<div><a href="/a">Hello.</a> <a href="http://example.com">New Tab</a></div>''',
          },
         ]
     for ii, td in enumerate(testdata):
         self.assertEquals(td['out'], elem2str(apply_filters(td['in'], [nomiscattrib])))
示例#9
0
    def test_squeezebr(self):
        from mobilize.filters import squeezebr
        testdata = [
            {'in_str' : '''<p>Hi.</p>''',
             'out_str' : '''<p>Hi.</p>''',
             },
            {'in_str' : '''<p>Hi.<br>Hey.</p>''',
             'out_str' : '''<p>Hi.<br>Hey.</p>''',
             },
            {'in_str' : '''<p>Hi.<br><br>Hey.</p>''',
             'out_str' : '''<p>Hi.<br>Hey.</p>''',
             },
            {'in_str' : '''<p>Hi.<br/><br/><br/><br/><br/><br/><br/><br/><br/>Hey.</p>''',
             'out_str' : '''<p>Hi.<br>Hey.</p>''',
             },
            {'in_str' : '''<div>
<p>Hi.<br><br>Hey.</p>
<p>This is some more text
<br><br><br><br><br><img src="foo.png" alt="foo"/>
</p>
</div>''',
             'out_str' : '''<div>
<p>Hi.<br>Hey.</p>
<p>This is some more text
<br><img src="foo.png" alt="foo">
</p>
</div>
''',
             },
            {'in_str' : '''<p>Hi.<br>    <br>Hey.</p>''',
             'out_str' : '''<p>Hi.<br>Hey.</p>''',
             },
            {'in_str' : '''<p>Hi.<br>How.<br>Hey.</p>''',
             'out_str' : '''<p>Hi.<br>How.<br>Hey.</p>''',
             },
            ]
        for ii, td in enumerate(testdata):
            elem = html.fragment_fromstring(td['in_str'], create_parent=False)
            squeezebr(elem)
            expected = normxml(td['out_str'])
            actual = normxml(elem2str(elem))
            self.assertSequenceEqual(expected, actual)
示例#10
0
    def test_resizeiframe(self):
        from mobilize.filters import resizeiframe
        testdata = [
            {'iframe_str' : '''<p>
<iframe width="533" height="330" frameborder="0" allowfullscreen="" src="http://www.youtube.com/embed/HE6uqPPrVfo" title="YouTube video player"></iframe>
</p>''',
             'resized_str' : '''<p>
<iframe width="280" height="173" frameborder="0" allowfullscreen="" src="http://www.youtube.com/embed/HE6uqPPrVfo" title="YouTube video player"></iframe>
</p>''',
             },
            {'iframe_str' : '''<iframe width="533" height="330" frameborder="0" allowfullscreen="" src="http://www.youtube.com/embed/HE6uqPPrVfo" title="YouTube video player"></iframe>''',
             'resized_str' : '''<iframe width="280" height="173" frameborder="0" allowfullscreen="" src="http://www.youtube.com/embed/HE6uqPPrVfo" title="YouTube video player"></iframe>''',
             },
            {'iframe_str' : '''<p>Nothing to see here.</p>''',
             'resized_str' : '''<p>Nothing to see here.</p>''',
             },
            ]
        for ii, td in enumerate(testdata):
            iframe_elem = html.fragment_fromstring(td['iframe_str'], create_parent=False)
            resizeiframe(iframe_elem)
            self.assertSequenceEqual(normxml(td['resized_str']), normxml(elem2str(iframe_elem)))
示例#11
0
 def html(self):
     assert self.elem is not None, 'Must invoke self.extract() and self.process() before rendering to html'
     return util.elem2str(self.elem)
示例#12
0
    def test_resizeobject(self):
        from mobilize.filters import resizeobject
        testdata = [
            {'object_str' : '''<div class="foobar"><ul><li><object width="800" height="344">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US"/>
<param name="allowFullScreen" value="true"/>
<param name="allowscriptaccess" value="always"/>
<embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="800" height="344"/>
</object></li></ul></div>''',
             'resized_str' : '''<div class="foobar"><ul><li><object width="280" height="120">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US">
<param name="allowFullScreen" value="true">
<param name="allowscriptaccess" value="always">
<embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280" height="120"></embed>
</object></li></ul></div>''',
             },
            {'object_str' : '''<object width="800" height="344">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US">
<param name="allowFullScreen" value="true">
<param name="allowscriptaccess" value="always">
<embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="800" height="344"></embed>
</object>''',
             'resized_str' : '''<object width="280" height="120">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US">
<param name="allowFullScreen" value="true">
<param name="allowscriptaccess" value="always">
<embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280" height="120"></embed>
</object>''',
             },
            {'object_str' : '''<OBJECT width="800" height="344">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US"/>
<param name="allowFullScreen" value="true"/>
<param name="allowscriptaccess" value="always"/>
<EMBED src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="800" height="344"/>
</OBJECT>''',
             'resized_str' : '''<object width="280" height="120">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US">
<param name="allowFullScreen" value="true">
<param name="allowscriptaccess" value="always">
<embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280" height="120"></embed>
</object>''',
             },
            # If not height defined, or otherwise can't calculate aspect ratio, just ignore that attribute
            {'object_str' : '''<OBJECT width="800">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US"/>
<param name="allowFullScreen" value="true"/>
<param name="allowscriptaccess" value="always"/>
<EMBED src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="800"/>
</OBJECT>''',
             'resized_str' : '''<object width="280">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US">
<param name="allowFullScreen" value="true">
<param name="allowscriptaccess" value="always">
<embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280"></embed>
</object>''',
             },
            {'object_str' : '''<OBJECT>
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US"/>
<param name="allowFullScreen" value="true"/>
<param name="allowscriptaccess" value="always"/>
<EMBED src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true"/>
</OBJECT>''',
             'resized_str' : '''<object width="280">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US">
<param name="allowFullScreen" value="true">
<param name="allowscriptaccess" value="always">
<embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280"></embed>
</object>''',
             },
            {'object_str' : '''<OBJECT width="800" height="beer">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US"/>
<param name="allowFullScreen" value="true"/>
<param name="allowscriptaccess" value="always"/>
<EMBED src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="800" height="beer"/>
</OBJECT>''',
             'resized_str' : '''<object width="280">
<param name="movie" value="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US">
<param name="allowFullScreen" value="true">
<param name="allowscriptaccess" value="always">
<embed src="http://www.youtube.com/v/fJ8FGIQG8gM?fs=1&amp;hl=en_US" type="application/x-shockwave-flash" allowscriptaccess="always" allowfullscreen="true" width="280"></embed>
</object>''',
             },
            {'object_str' : '''<p>Nothing to see here.</p>''',
             'resized_str' : '''<p>Nothing to see here.</p>''',
             },
            ]
        for ii, td in enumerate(testdata):
            object_elem = html.fragment_fromstring(td['object_str'], create_parent=False)
            resizeobject(object_elem)
            self.assertSequenceEqual(normxml(td['resized_str']), normxml(elem2str(object_elem)))
示例#13
0
    def test_table2divrows(self):
        testdata = [
            {'in_str' : '''<div><table>
      <tr>
        <td>Eggs</td>
        <td>Ham</td>
      </tr>
      <tr>
        <td>Beer</td>
        <td>Milk</td>
      </tr>
    </table></div>
''',
             'out_str' : '''<div>
  <div class="mwu-table2divrows">
    <div class="mwu-table2divrows-row0">
      <div class="mwu-table2divrows-row0-col0 mwu-table2divrows-col0">Eggs</div>
      <div class="mwu-table2divrows-row0-col1 mwu-table2divrows-col1">Ham</div>
    </div>
    <div class="mwu-table2divrows-row1">
      <div class="mwu-table2divrows-row1-col0 mwu-table2divrows-col0">Beer</div>
      <div class="mwu-table2divrows-row1-col1 mwu-table2divrows-col1">Milk</div>
    </div>
  </div>
</div>
''',
             },
            #================
            {'in_str' : '''<div><table><tbody>
      <tr>
        <td>Eggs</td>
        <td>Ham</td>
      </tr>
      <tr>
        <td>Beer</td>
        <td>Milk</td>
      </tr>
    </tbody></table></div>
''',
             'out_str' : '''<div>
  <div class="mwu-table2divrows">
    <div class="mwu-table2divrows-row0">
      <div class="mwu-table2divrows-row0-col0 mwu-table2divrows-col0">Eggs</div>
      <div class="mwu-table2divrows-row0-col1 mwu-table2divrows-col1">Ham</div>
    </div>
    <div class="mwu-table2divrows-row1">
      <div class="mwu-table2divrows-row1-col0 mwu-table2divrows-col0">Beer</div>
      <div class="mwu-table2divrows-row1-col1 mwu-table2divrows-col1">Milk</div>
    </div>
  </div>
</div>
''',
             },
            {'in_str' : '''<div><p>Nothing here.</p></div>''',
             'out_str' : '''<div><p>Nothing here.</p></div>''',
             },
            {'in_str' : '''<div><table>
      <tr>
        <td><table id="foobar"><tr><td>Whoa</td><td>dude</td></tr></table></td>
        <td>Key Lime Pie</td>
      </tr>
    </table></div>''',
             'out_str' : '''<div><div class="mwu-table2divrows">
<div class="mwu-table2divrows-row0">
    <div class="mwu-table2divrows-row0-col0 mwu-table2divrows-col0">
      <table id="foobar"><tr><td>Whoa</td><td>dude</td></tr></table>
    </div>
    <div class="mwu-table2divrows-row0-col1 mwu-table2divrows-col1">Key Lime Pie</div>
    </div>
</div>
</div>''',
             },
            {'in_str' : '''<div><table>
      <tr>
        <td>
Does html like this exist somewhere in the wild?
<table id="foobar"><tr><td>Whoa</td><td>dude</td></tr></table>
<p>yeah, I bet somewhere it does</p>
(probably on some website that gets 10K hits on a slow day)
<table id="foobar"><tr><td>Game</td><td>Over Man</td></tr></table>
here's some extra trailing text for you too
</td>
        <td>Key Lime Pie</td>
      </tr>
    </table></div>''',
             'out_str' : '''<div><div class="mwu-table2divrows">
  <div class="mwu-table2divrows-row0">
    <div class="mwu-table2divrows-row0-col0 mwu-table2divrows-col0">
Does html like this exist somewhere in the wild?
<table id="foobar"><tr><td>Whoa</td><td>dude</td></tr></table>
<p>yeah, I bet somewhere it does</p>
(probably on some website that gets 10K hits on a slow day)
<table id="foobar"><tr><td>Game</td><td>Over Man</td></tr></table>
here's some extra trailing text for you too
    </div>
    <div class="mwu-table2divrows-row0-col1 mwu-table2divrows-col1">Key Lime Pie</div>
    </div>
  </div>
</div>''',
             },
            {'in_str' : '''<table>
      <tr>
        <td>Eggs</td>
        <td>Ham</td>
      </tr>
      <tr>
        <td>Beer</td>
        <td>Milk</td>
      </tr>
    </table>
''',
             'out_str' : '''<div class="mwu-table2divrows">
  <div class="mwu-table2divrows-row0">
    <div class="mwu-table2divrows-row0-col0 mwu-table2divrows-col0">Eggs</div>
    <div class="mwu-table2divrows-row0-col1 mwu-table2divrows-col1">Ham</div>
  </div>
  <div class="mwu-table2divrows-row1">
    <div class="mwu-table2divrows-row1-col0 mwu-table2divrows-col0">Beer</div>
    <div class="mwu-table2divrows-row1-col1 mwu-table2divrows-col1">Milk</div>
  </div>
</div>
''',
             },
            ]
        from mobilize.filters import table2divrows
        for ii, td in enumerate(testdata):
            in_elem = html.fragment_fromstring(td['in_str'], create_parent=False)
            table2divrows(in_elem)
            self.assertSequenceEqual(normxml(td['out_str']), normxml(elem2str(in_elem)))
示例#14
0
    def test_table2divgroups(self):
        from mobilize.filters.tables import Spec
        ELEMSTR1 = '''<div id="some-container">
<table>
      <tbody>
        <tr>
          <td>CONTACT US</td>
          <td>&nbsp;</td>
          <td>&nbsp;</td>
          <td>&nbsp;</td>
        <tr>
          <td>123 Main Str</td>
          <td>&nbsp;</td>
          <td>OUR TEAM</td>
          <td>&nbsp;</td>
        <tr>
          <td>Springfield, IL</td>
          <td>&nbsp;</td>
          <td>Mike Smith</td>
          <td><img src="/mike-smith.jpg"/></td>
        <tr>
          <td>1-800-BUY-DUFF</td>
          <td>&nbsp;</td>
          <td>Jen Jones</td>
          <td><img src="/jen-jones.jpg"/></td>
        <tr>
          <td>&nbsp;</td>
          <td>&nbsp;</td>
          <td>Scruffy</td>
          <td><img src="/scruffy-the-dog.jpg"/></td>
        <tr>
      </tbody>
    </table>
</div>
'''
        testdata = [
            {'elem_str' : ELEMSTR1,
             'specmap' : [],
             'out_str' : '''
<div id="some-container">
  <div class="mwu-elem-table2divgroups">
  </div>
</div>
''',
             },
            {'elem_str' : ELEMSTR1,
             'specmap' : [
                    (Spec('idname1', 0, 0, 0, 0)),
                    ],
             'out_str' : '''
<div id="some-container">
  <div class="mwu-elem-table2divgroups">
    <div class="mwu-elem-table2divgroups-group" id="idname1">
      <div>CONTACT US</div>
    </div>
  </div>
</div>
''',
             },
            {'elem_str' : ELEMSTR1,
             'specmap' : [
                    (Spec('idname1', 0, 0, 3, 0)),
                    ],
             'out_str' : '''
<div id="some-container">
  <div class="mwu-elem-table2divgroups">
    <div class="mwu-elem-table2divgroups-group" id="idname1">
      <div>CONTACT US</div>
      <div>123 Main Str</div>
      <div>Springfield, IL</div>
      <div>1-800-BUY-DUFF</div>
    </div>
  </div>
</div>
''',
             },
            {'elem_str' : ELEMSTR1,
             'specmap' : [
                    (Spec('idname1', 0, 0, 0, 0)),
                    (Spec('idname2', 0, 0, 3, 0)),
                    ],
             'out_str' : '''
<div id="some-container">
  <div class="mwu-elem-table2divgroups">
    <div class="mwu-elem-table2divgroups-group" id="idname1">
      <div>CONTACT US</div>
    </div>
    <div class="mwu-elem-table2divgroups-group" id="idname2">
      <div>CONTACT US</div>
      <div>123 Main Str</div>
      <div>Springfield, IL</div>
      <div>1-800-BUY-DUFF</div>
    </div>
  </div>
</div>
''',
             },
            {'elem_str' : ELEMSTR1,
             'specmap' : [
                    (Spec('idname2', 0, 0, 3, 0)),
                    (Spec('idname1', 0, 0, 0, 0)),
                    ],
             'out_str' : '''
<div id="some-container">
  <div class="mwu-elem-table2divgroups">
    <div class="mwu-elem-table2divgroups-group" id="idname2">
      <div>CONTACT US</div>
      <div>123 Main Str</div>
      <div>Springfield, IL</div>
      <div>1-800-BUY-DUFF</div>
    </div>
    <div class="mwu-elem-table2divgroups-group" id="idname1">
      <div>CONTACT US</div>
    </div>
  </div>
</div>
''',
             },
            {'elem_str' : ELEMSTR1,
             'specmap' : [
                    (Spec('idname2', 0, 0, 3, 0)),
                    (Spec('idname1', 0, 0, 0, 0)),
                    ],
             'out_str' : '''
<div id="some-container">
  <div class="mwu-elem-table2divgroups">
    <div class="mwu-elem-table2divgroups-group" id="idname2">
      <div>CONTACT US</div>
      <div>123 Main Str</div>
      <div>Springfield, IL</div>
      <div>1-800-BUY-DUFF</div>
    </div>
    <div class="mwu-elem-table2divgroups-group" id="idname1">
      <div>CONTACT US</div>
    </div>
  </div>
</div>
''',
             },
            {'elem_str' : ELEMSTR1,
             'specmap' : [
                    (Spec('idname1', 0, 0, 4, 0)),
                    ],
             'out_str' : '''
<div id="some-container">
  <div class="mwu-elem-table2divgroups">
    <div class="mwu-elem-table2divgroups-group" id="idname1">
      <div>CONTACT US</div>
      <div>123 Main Str</div>
      <div>Springfield, IL</div>
      <div>1-800-BUY-DUFF</div>
    </div>
  </div>
</div>
''',
             },
            {'elem_str' : ELEMSTR1,
             'omit_whitespace' : False,
             'specmap' : [
                    (Spec('idname1', 0, 0, 4, 0)),
                    ],
             'out_str' : '''
<div id="some-container">
  <div class="mwu-elem-table2divgroups">
    <div class="mwu-elem-table2divgroups-group" id="idname1">
      <div>CONTACT US</div>
      <div>123 Main Str</div>
      <div>Springfield, IL</div>
      <div>1-800-BUY-DUFF</div>
      <div>&#160;</div>
    </div>
  </div>
</div>
''',
             },
            {'elem_str' : ELEMSTR1,
             'specmap' : [
                    (Spec('idname1', 1, 2, 4, 3)),
                    ],
             'out_str' : '''
<div id="some-container">
  <div class="mwu-elem-table2divgroups">
    <div class="mwu-elem-table2divgroups-group" id="idname1">
      <div>
        <div>OUR TEAM</div>
      </div>
      <div>
        <div>Mike Smith</div>
        <div><img src="/mike-smith.jpg"></div>
      </div>
      <div>
        <div>Jen Jones</div>
        <div><img src="/jen-jones.jpg"></div>
      </div>
      <div>
        <div>Scruffy</div>
        <div><img src="/scruffy-the-dog.jpg"></div>
      </div>
    </div>
  </div>
</div>
''',
             },
            
            {'elem_str' : '''<div>
<table>
<tr><td colspan="3">a</td></tr>
<tr>
  <td>b</td>
  <td>c</td>
  <td>d</td>
</tr>
</table>
''',
             'specmap' : [
                    (Spec('idname1', 0, 0, 1, 1)),
                    ],
             'out_str' : '''
<div>
  <div class="mwu-elem-table2divgroups">
    <div class="mwu-elem-table2divgroups-group" id="idname1">
      <div><div>a</div></div>
      <div>
        <div>b</div>
        <div>c</div>
      </div>
    </div>
  </div>
</div>
''',
             },
            ]
        from mobilize.filters import table2divgroups
        for ii, td in enumerate(testdata):
            omit_whitespace = td.get('omit_whitespace', True)
            elem = html.fromstring(td['elem_str'])
            table2divgroups(elem, td['specmap'], omit_whitespace=omit_whitespace)
            expected = normxml(td['out_str'])
            actual = normxml(elem2str(elem))
            self.assertSequenceEqual(expected, actual)