Python Document.summary_with_metadata示例

编程语言: Python

命名空间/包名称: readability_lxml.readability

类/类型: Document

方法/功能: summary_with_metadata

hotexamples.com的示例: 2

Python Document.summary_with_metadata - 已找到2个示例。这些是从开源项目中提取的最受好评的readability_lxml.readability.Document.summary_with_metadata现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

summary(3)

summary_with_metadata(2)

示例#1

显示文件

文件： test_readability.py 项目： ZoeyYoung/python-readability

    def test_basic(self):
        html = load_regression_data('basic-multi-page.html')
        urldict = self._make_basic_urldict()
        fetcher = urlfetch.MockUrlFetch(urldict)
        options = {
                'url': 'http://basic.com/article.html',
                'multipage': True,
                'urlfetch': fetcher
                }
        doc = Document(html, **options)
        res = doc.summary_with_metadata()

        self.assertIn('Page 2', res.html, 'Should find the page 2 heading')
        self.assertIn('Page 3', res.html, 'Should find the page 3 heading')

        expected_html = load_regression_data('basic-multi-page-expected.html')
        diff_html = htmldiff(expected_html, res.html)
        diff_doc = document_fromstring(diff_html)

        insertions = diff_doc.xpath('//ins')
        deletions = diff_doc.xpath('//del')

        if len(insertions) != 0:
            for i in insertions:
                print('unexpected insertion: %s' % i.xpath('string()'))
            self.fail('readability result does not match expected')

        if len(deletions) != 0:
            for i in deletions:
                print('unexpected deletion: %s' % i.xpath('string()'))
            self.fail('readability result does not match expected')

示例#2

显示文件

文件： test_article_only.py 项目： mitechie/python-readability

 def test_si_sample_full_summary(self):
     """We should parse the doc and get a full summary with confidence"""
     sample = load_sample('si-game.sample.html')
     doc = Document(sample, url='http://sportsillustrated.cnn.com/baseball/mlb/gameflash/2012/04/16/40630_preview.html')
     res = doc.summary_with_metadata(enclose_with_html_tag=False)
     self.assertTrue(hasattr(res, 'html'),
         'res should have an html attrib')
     self.assertTrue(hasattr(res, 'confidence'),
         'res should have an html attrib')
     self.assertTrue(hasattr(res, 'title'),
             'res should have an titile attrib')
     self.assertTrue(hasattr(res, 'short_title'),
         'res should have an short_title attrib')
     self.assertEqual('<div><div class="', res.html[0:17])
     self.assertTrue(res.confidence > 50,
         'The confidence score should be larger than 50: ' + str(res.confidence))