Exemplo n.º 1
0
    def parse(self, response):
        """
        This method is to extract links and return the requests
        """
        for quote in response.xpath(
                "//div[@class='quote']"
        ):  # here starts the implementation of xpath
            loader = ItemLoader(item=QuoteItem(),
                                selector=quote,
                                response=response)
            loader.add_xpath('text', ".//div[@class='quoteText']/text()[1]")
            loader.add_xpath('author',
                             ".//span[@class='authorOrTitle']/text()")
            loader.add_xpath(
                'tags', ".//div[@class='greyText smallText left']/a/text()")
            yield loader.load_item()
        """
        Here is used the next buttom of the page to iterate until the last page
        """
        next_page = response.xpath(
            "//a[@class='next_page']/@href").extract_first()

        if next_page is not None:
            next_page_link = response.urljoin(next_page)
            yield scrapy.Request(url=next_page_link, callback=self.parse)
 def parse(self,respones):
     next_page = respones.selector.xpath("//a[@class='next_page']/@href").extract_first()
     # next_page = respones.xpath("//a[@class='next_page']/@href").extract_first() also workes 
     if next_page is not None:
         next_page_link = respones.urljoin(next_page)
         yield scrapy.Request(url=next_page_link,callback=self.parse)
     for qu in respones.selector.xpath("//div[@class='quote']"):
     #for qu in respones.xpath("//div[@class='quote']"): also workes 
         loder = ItemLoader(item=QuoteItem(),selector=qu,response=respones)
         loder.add_xpath('text',".//div[@class='quoteText']/text()[1]")
         loder.add_xpath('author',".//span[@class='authorOrTitle']")
         loder.add_xpath('tags',".//div[@class='greyText smallText left']/a")
         yield loder.load_item()
Exemplo n.º 3
0
    def parse(self, response):
        for quote in response.xpath("//div[@class='quote']"):
            loader= ItemLoader(item=QuoteItem(), selector=quote, response=response)
            loader.add_xpath('text', ".//div[@class='quoteText']/text()[1]")
            loader.add_xpath('author', ".//div[@class='quoteText']/child::span")
            loader.add_xpath('tags', ".//div[@class='greyText smallText left']/a")
            yield loader.load_item()
            
        
        # /quotes?page=2
        next_page= response.xpath("//a[@class='next_page']/@href").extract_first()

        if next_page is not None:
            next_page_link= response.urljoin(next_page)
            yield scrapy.Request(url=next_page_link, callback=self.parse)
Exemplo n.º 4
0
    def parse(self, response):
        for quote in response.selector.xpath("//div[@class='quote']"):
            loader = ItemLoader(item=QuoteItem(), selector=quote)
            loader.add_xpath('text', ".//div[@class='quoteText']/text()[1]")
            loader.add_xpath('author',
                             ".//span[@class='authorOrTitle']/text()")
            loader.add_xpath(
                'tags',
                ".//div[@class='quoteFooter']/div/a[contains(@href, '/quotes/tag/')]"
            )
            yield loader.load_item()

        # exe: /quotes?page=2
        next_page = response.selector.xpath(
            "//a[@class='next_page']/@href").extract_first()
        if next_page is not None:
            next_page_link = response.urljoin(next_page)
            yield scrapy.Request(url=next_page_link, callback=self.parse)
Exemplo n.º 5
0
    def parse(self, response, **kwargs):
        for quote in response.xpath("//div[@class='quote']"):
            loader = ItemLoader(item=QuoteItem(),selector=quote,response=response)
            loader.add_xpath("text",".//div[@class='quoteText']/text()[1]")
            loader.add_xpath("author",".//span[@class='authorOrTitle']/text()")
            loader.add_xpath("tags",".//div[@class='greyText smallText left']/a/text()")
            yield loader.load_item()
            # yield {
            #     "text": quote.xpath(".//div[@class='quoteText']/text()[1]").extract_first(),
            #     "author": quote.xpath(".//span[@class='authorOrTitle']/text()").extract_first(),
            #     "tags": quote.xpath(".//div[@class='greyText smallText left']/a/text()").extract(),
            #     }

        next_page = response.xpath("//a[@class='next_page']/@href").extract_first()
        
        if next_page is not None:
            next_page_link = response.urljoin(next_page)
            yield scrapy.Request(url=next_page_link,callback=self.parse)