Python strip_template примеры использования

Язык программирования: Python

Пространство имен/Пакет: ebdata.templatemaker.clean

Метод/Функция: strip_template

Примеров на hotexamples.com: 4

Python strip_template - 4 примера найдено. Это лучшие примеры Python кода для ebdata.templatemaker.clean.strip_template, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Пример #1

Показать файл

 def auto_excerpt(self):
     """
     Attempts to detect the text of this page (ignoring all navigation and
     other clutter), returning a list of strings. Each string represents a
     paragraph.
     """
     from ebdata.textmining.treeutils import make_tree
     tree = make_tree(self.html)
     if self.seed.rss_full_entry:
         from ebdata.templatemaker.textlist import html_to_paragraph_list
         paras = html_to_paragraph_list(tree)
     else:
         if self.seed.strip_noise:
             from ebdata.templatemaker.clean import strip_template
             try:
                 html2 = self.companion_page().html
             except IndexError:
                 pass
             else:
                 tree2 = make_tree(html2)
                 strip_template(tree, tree2)
         if self.seed.guess_article_text:
             from ebdata.templatemaker.articletext import article_text
             paras = article_text(tree)
         else:
             from ebdata.templatemaker.textlist import html_to_paragraph_list
             paras = html_to_paragraph_list(tree)
     return paras

Пример #2

Показать файл

Файл: models.py Проект: AndrewJHart/everyblock_code

 def auto_excerpt(self):
     """
     Attempts to detect the text of this page (ignoring all navigation and
     other clutter), returning a list of strings. Each string represents a
     paragraph.
     """
     from ebdata.textmining.treeutils import make_tree
     tree = make_tree(self.html)
     if self.seed.rss_full_entry:
         from ebdata.templatemaker.textlist import html_to_paragraph_list
         paras = html_to_paragraph_list(tree)
     else:
         if self.seed.strip_noise:
             from ebdata.templatemaker.clean import strip_template
             try:
                 html2 = self.companion_page().html
             except IndexError:
                 pass
             else:
                 tree2 = make_tree(html2)
                 strip_template(tree, tree2)
         if self.seed.guess_article_text:
             from ebdata.templatemaker.articletext import article_text
             paras = article_text(tree)
         else:
             from ebdata.templatemaker.textlist import html_to_paragraph_list
             paras = html_to_paragraph_list(tree)
     return paras

Пример #3

Показать файл

Файл: clean.py Проект: DotNetWebs/openblock

    def assertStrips(self, html1, html2, expected, num_removals, check_ids=False):
        """
        Asserts that strip_template(html1, html2) will result in the expected
        HTML string, and that the return value is num_removals.
        """
        # The test strings should *not* have <html> and <body> tags, for the
        # sake of brevity.
        tree1 = document_fromstring('<html><body>%s</body></html>' % html1)
        tree2 = document_fromstring('<html><body>%s</body></html>' % html2)
        expected = '<html><body>%s</body></html>' % expected

        got_removals = strip_template(tree1, tree2, check_ids=check_ids)
        got_tree = etree.tostring(tree1, method='html')
        self.assertEqual(got_tree, expected)
        self.assertEqual(got_removals, num_removals)

Пример #4

Показать файл

    def assertStrips(self, html1, html2, expected, num_removals, check_ids=False):
        """
        Asserts that strip_template(html1, html2) will result in the expected
        HTML string, and that the return value is num_removals.
        """
        # The test strings should *not* have <html> and <body> tags, for the
        # sake of brevity.
        tree1 = document_fromstring('<html><body>%s</body></html>' % html1)
        tree2 = document_fromstring('<html><body>%s</body></html>' % html2)
        expected = '<html><body>%s</body></html>' % expected

        got_removals = strip_template(tree1, tree2, check_ids=check_ids)
        got_tree = etree.tostring(tree1, method='html')
        self.assertEqual(got_tree, expected)
        self.assertEqual(got_removals, num_removals)