Пример #1
0
 def testHTML2Text(self):
     """Test correct splitting of HTML"""
     text = html2text("<div>line1\r\nline2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div>line1\r\n line2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div>line1 \r\n line2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div>line1 \r \n line2</div>")
     self.assertEquals(text, "line1 line2")
 def test_html2text(self):
     self.assertEqual(html2text('x'), 'x')
     self.assertEqual(html2text('<p class="shiny">text x</p>'), 'text x')
     # Accented character, issue #7.  Well, this does not illustrate the
     # error, as that is in javascript, but it seems nice to test this.
     self.assertEqual(
         html2text('SOMEWORDWITH\xc3\xa8'), 'SOMEWORDWITH\xc3\xa8')
     # '<' and '>' are removed for safety when they are part of a tag:
     self.assertEqual(
         html2text('text <script>alert(42)</script>'),
         'text alert(42)')
     # Actual greater/smaller than signs still work fine:
     self.assertEqual(
         html2text('1 < 2 > 0'),
         '1 < 2 > 0')
     # Well, the actual text that we get when you save the above in a
     # definition actually has escaped tags, which shows fine:
     self.assertEqual(
         html2text('1 &amp;lt; 2'),
         '1 &lt; 2')
     # We are not fooled by dangerous escaped tags:
     self.assertEqual(
         html2text('text &lt;script&gt;alert(42)&lt;/script&gt;'),
         'text scriptalert(42)/script')
     # This may look scary but is actually harmless: it is shown literally
     # in the browser, no alert pops up.
     self.assertEqual(
         html2text('text &amp;lt;script&amp;gt;alert(42)'
                   '&amp;lt;/script&amp;gt;'),
         'text &lt;script&gt;alert(42)&lt;/script&gt;')
Пример #3
0
 def test_html2text(self):
     self.assertEqual(html2text('x'), 'x')
     self.assertEqual(html2text('<p class="shiny">text x</p>'), 'text x')
     # Accented character, issue #7.  Well, this does not illustrate the
     # error, as that is in javascript, but it seems nice to test this.
     self.assertEqual(html2text('SOMEWORDWITH\xc3\xa8'),
                      'SOMEWORDWITH\xc3\xa8')
     # '<' and '>' are removed for safety when they are part of a tag:
     self.assertEqual(html2text('text <script>alert(42)</script>'),
                      'text alert(42)')
     # Actual greater/smaller than signs still work fine:
     self.assertEqual(html2text('1 < 2 > 0'), '1 < 2 > 0')
     # Well, the actual text that we get when you save the above in a
     # definition actually has escaped tags, which shows fine:
     self.assertEqual(html2text('1 &amp;lt; 2'), '1 &lt; 2')
     # We are not fooled by dangerous escaped tags:
     self.assertEqual(
         html2text('text &lt;script&gt;alert(42)&lt;/script&gt;'),
         'text scriptalert(42)/script')
     # This may look scary but is actually harmless: it is shown literally
     # in the browser, no alert pops up.
     self.assertEqual(
         html2text('text &amp;lt;script&amp;gt;alert(42)'
                   '&amp;lt;/script&amp;gt;'),
         'text &lt;script&gt;alert(42)&lt;/script&gt;')
    def Description(self, from_catalog=False):
        """Returns cleaned text"""

        if from_catalog:
            cat = self.getCatalog()
            brains = cat.searchResults(id=self.getId())

            if not brains:
                return self.Description()

            brain = brains[0]
            return brain.Description
        else:
            html = self.getDefinition()
            return html2text(html)
    def Description(self, from_catalog=False):
        """Returns cleaned text"""

        if from_catalog:
            cat = self.getCatalog()
            brains = cat.searchResults(id=self.getId())

            if not brains:
                return self.Description()

            brain = brains[0]
            return brain.Description
        else:
            html = self.getDefinition()
            return html2text(html)
 def testHTML2Text(self):
     """Test correct splitting of HTML"""
     text = html2text("<div>line1\r\nline2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div>line1\r\n line2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div>line1 \r\n line2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div>line1 \r \n line2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div><ul><li>Seleção campeã!</li></ul></div>")
     self.assertEquals(text, "- Seleção campeã!".encode("utf-8"))
     text = html2text(
         "<div><ul><li>Sele&ccedil;&atilde;o campe&atilde;!</li>"
         "</ul></div>")
     self.assertEquals(text, "- Seleção campeã!".encode("utf-8"))
     text = html2text(
         "<div><ul><li>Sele&#231;&#227;o campe&#227;!</li></ul></div>")
     self.assertEquals(text, "- Seleção campeã!".encode("utf-8"))
 def testHTML2Text(self):
     """Test correct splitting of HTML"""
     text = html2text("<div>line1\r\nline2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div>line1\r\n line2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div>line1 \r\n line2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div>line1 \r \n line2</div>")
     self.assertEquals(text, "line1 line2")
     text = html2text("<div><ul><li>Seleção campeã!</li></ul></div>")
     self.assertEquals(text, u"- Seleção campeã!".encode("utf-8"))
     text = html2text(
         "<div><ul><li>Sele&ccedil;&atilde;o campe&atilde;!</li>"
         "</ul></div>")
     self.assertEquals(text, u"- Seleção campeã!".encode("utf-8"))
     text = html2text(
         "<div><ul><li>Sele&#231;&#227;o campe&#227;!</li></ul></div>")
     self.assertEquals(text, u"- Seleção campeã!".encode("utf-8"))