def test_html_cleaners(self): """HTML is correctly cleaned""" html = "<p class='plod'></p><p>Hello</p><p class=''> </p><p class='test'> Good bye </p>" self.assertEqual(utils.remove_unnecessary_white_space(html), "<p>Hello</p><p class='test'> Good bye </p>") html = bs('<p><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg" style="width: 1382px; height: 1037px;" /></p><p class="caption">This is the caption.</p>', "html.parser") self.assertEqual(str(utils.replaceImgHeightWidthWithClass(html)), '<p><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg"/></p><p class="caption">This is the caption.</p>', "html.parser") html = bs('<p><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg" style="width: 1382px; height: 1037px;" /></p><p class="caption">This is the caption.</p>', "html.parser") self.assertEqual(str(utils.replacePImgWithFigureImg(html)), '<figure><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg" style="width: 1382px; height: 1037px;"/><figcaption>This is the caption.</figcaption></figure>') html = '<p><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg" style="width: 1382px; height: 1037px;" /></p><p class="caption">This is the caption.</p>' self.assertEqual(utils.replaceBadHtmlWithGood(html), '<figure><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg"/><figcaption>This is the caption.</figcaption></figure>') html1 = "<p>The dog ran away.</p>" \ "<p>The dog -- ran away.</p>" \ "<p>The dog --- ran away.</p>" \ "<p>The dog--ran away.</p>" \ "<p>The dog---ran away.</p>" html2 = "<p>The dog ran away.</p>" \ "<p>The dog – ran away.</p>" \ "<p>The dog — ran away.</p>" \ "<p>The dog--ran away.</p>" \ "<p>The dog---ran away.</p>" html3 = str(utils.processDashes(bs(html1))) self.assertEqual(html2, html3)
def test_html_cleaners(self): """HTML is correctly cleaned""" html = "<p class='plod'></p><p>Hello</p><p class=''> </p><p class='test'> Good bye </p>" self.assertEqual(utils.remove_blank_paras(html), "<p>Hello</p><p class='test'> Good bye </p>") html = bs('<p><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg" style="width: 1382px; height: 1037px;" /></p><p class="caption">This is the caption.</p>', "html.parser") self.assertEqual(str(utils.replaceImgHeightWidthWithClass(html)), '<p><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg"/></p><p class="caption">This is the caption.</p>', "html.parser") html = bs('<p><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg" style="width: 1382px; height: 1037px;" /></p><p class="caption">This is the caption.</p>', "html.parser") self.assertEqual(str(utils.replacePImgWithFigureImg(html)), '<figure><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg" style="width: 1382px; height: 1037px;"/><figcaption>This is the caption.</figcaption></figure>') html = '<p><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg" style="width: 1382px; height: 1037px;" /></p><p class="caption">This is the caption.</p>' self.assertEqual(utils.replaceBadHtmlWithGood(html), '<figure><img alt="" src="/media/uploads/church-SiyavuyaKhaya-20150128.jpg"/><figcaption>This is the caption.</figcaption></figure>')