Exemplo n.º 1
0
    def parse_item(self, response):
        article = Goose().extract(raw_html=response.body)

        yield Article(title=article.title,
                      text=article.cleaned_text,
                      url=response.url,
                      field=self.name)
Exemplo n.º 2
0
    def parse_item(self, response):
        title = response.css('title::text').extract_first()

        extractor = Extractor(extractor='ArticleExtractor', html=response.body)

        yield Article(title=title,
                      text=extractor.getText(),
                      url=response.url,
                      field=self.name)
Exemplo n.º 3
0
    def parse_item(self, response):
        title = response.css('title::text').extract_first()

        body = parse_body(response)

        yield Article(title=title,
                      text=body,
                      url=response.url,
                      field=self.name)