示例#1
0
    def parse(self, response):
        links = response.css("a.title::attr(href)").getall()

        Sparql.is_endpoint(util.link_filter(links),
                           first_crawl=Yippy.is_first_crawl)

        next_page = response.css("a.listnext::attr(href)").get()

        if next_page is not None:
            yield Request(response.urljoin(next_page), callback=self.parse)
示例#2
0
    def parse(self, response):

        links = response.css("a.ob::attr(href)").getall()

        Sparql.is_endpoint(util.link_filter(links),
                           first_crawl=Mojeek.is_first_crawl)

        next_page = response.css("div.pagination a::attr(href)").getall()[-1]

        if next_page is not None:
            yield Request(response.urljoin(next_page), callback=self.parse)
示例#3
0
    def parse(self, response):

        links = response.css(
            "a.ac-algo.fz-l.ac-21th.lh-24::attr(href)").getall()

        Sparql.is_endpoint(util.link_filter(links),
                           first_crawl=Aol.is_first_crawl)

        next_page = response.css("a.next::attr(href)").get()

        if next_page is not None:
            yield Request(response.urljoin(next_page), callback=self.parse)
示例#4
0
    def parse(self, response):

        links = response.css("div.b_title a.sh_favicon::attr(href)").getall()

        Sparql.is_endpoint(util.link_filter(links),
                           first_crawl=Bing.is_first_crawl)

        next_page = response.css(
            "a.sb_pagN.sb_pagN_bp.b_widePag.sb_bp::attr(href)").get()

        if next_page is not None:
            yield Request(response.urljoin(next_page), callback=self.parse)
示例#5
0
    def parse(self, response):

        links = response.css(
            "a.PartialSearchResults-item-title-link.result-link::attr(href)"
        ).getall()

        Sparql.is_endpoint(util.link_filter(links),
                           first_crawl=Ask.is_first_crawl)

        next_page = response.css(
            "li.PartialWebPagination-next a::attr(href)").get()

        if next_page is not None:
            yield Request(response.urljoin(next_page), callback=self.parse)
示例#6
0
    def parse(self, response):

        links = response.css("div.kCrYT a::attr(href)").getall()

        Sparql.is_endpoint(util.link_filter(
            util.link_regulator_for_google(links)),
                           first_crawl=Google.is_first_crawl)

        next_page = response.css("a.nBDE1b.G5eFlf::attr(href)").get()

        if "start=10&" in response.url:
            next_page = response.css("a.nBDE1b.G5eFlf::attr(href)").getall()[1]

        if next_page is not None:
            # yield response.follow(next_page, callback=self.parse)
            yield Request(response.urljoin(next_page), callback=self.parse)