def parse(self, response): links = response.css("a.title::attr(href)").getall() Sparql.is_endpoint(util.link_filter(links), first_crawl=Yippy.is_first_crawl) next_page = response.css("a.listnext::attr(href)").get() if next_page is not None: yield Request(response.urljoin(next_page), callback=self.parse)
def parse(self, response): links = response.css("a.ob::attr(href)").getall() Sparql.is_endpoint(util.link_filter(links), first_crawl=Mojeek.is_first_crawl) next_page = response.css("div.pagination a::attr(href)").getall()[-1] if next_page is not None: yield Request(response.urljoin(next_page), callback=self.parse)
def parse(self, response): links = response.css( "a.ac-algo.fz-l.ac-21th.lh-24::attr(href)").getall() Sparql.is_endpoint(util.link_filter(links), first_crawl=Aol.is_first_crawl) next_page = response.css("a.next::attr(href)").get() if next_page is not None: yield Request(response.urljoin(next_page), callback=self.parse)
def parse(self, response): links = response.css("div.b_title a.sh_favicon::attr(href)").getall() Sparql.is_endpoint(util.link_filter(links), first_crawl=Bing.is_first_crawl) next_page = response.css( "a.sb_pagN.sb_pagN_bp.b_widePag.sb_bp::attr(href)").get() if next_page is not None: yield Request(response.urljoin(next_page), callback=self.parse)
def parse(self, response): links = response.css( "a.PartialSearchResults-item-title-link.result-link::attr(href)" ).getall() Sparql.is_endpoint(util.link_filter(links), first_crawl=Ask.is_first_crawl) next_page = response.css( "li.PartialWebPagination-next a::attr(href)").get() if next_page is not None: yield Request(response.urljoin(next_page), callback=self.parse)
def parse(self, response): links = response.css("div.kCrYT a::attr(href)").getall() Sparql.is_endpoint(util.link_filter( util.link_regulator_for_google(links)), first_crawl=Google.is_first_crawl) next_page = response.css("a.nBDE1b.G5eFlf::attr(href)").get() if "start=10&" in response.url: next_page = response.css("a.nBDE1b.G5eFlf::attr(href)").getall()[1] if next_page is not None: # yield response.follow(next_page, callback=self.parse) yield Request(response.urljoin(next_page), callback=self.parse)