def parse_item(self, response): self.log("Scraping: " + response.url) articles = response.xpath(self.xpath_dict["articles"]) for article in articles: item = NewsArticleItem() item["title"] = article.xpath( self.xpath_dict["title"]).extract_first() item["url"] = article.xpath(self.xpath_dict["url"]).extract_first() yield item
def parse_item(self, response): self.log("Scraping: " + response.url) articles = response.xpath(self.xpath_dict["articles"]) logging.log(logging.INFO, "Article:: \n" + str(articles)) for article in articles: item = NewsArticleItem() try: item["url"] = article.xpath( self.xpath_dict["url"]).extract_first() logging.log(logging.INFO, "Linked scraped: \n" + str(item["url"])) title = item["url"].split(".com")[-1] item["title"] = title[:len(title) - 10] except AttributeError: print("Root Spider failed to scrape item") yield item