示例#1
0
文件: url.py 项目: kuc2477/news
    def parse(self, content):
        """Parses html content of http response body into a single
        :class:`~news.models.abstract.Readable`.

        Internally uses :class:`~extraction.Extractor` extractor to extract
        sementic tags from the plain html content.

        :param content: Http response body
        :type content: :class:`str`
        :returns: A parsed readable
        :rtype: :class:`~news.models.abstract.Readable`

        """
        extractor = Extractor()
        extracted = extractor.extract(content)
        return Readable(url=self.url, title=extracted.title, content=content,
                        summary=extracted.description, image=extracted.image)