def parse(self, response): sel = Selector(response) links = sel.xpath(AirportSelectors.LIST_AIRPORTS).extract() current_page = long(response.meta.get('current_page') or 1) print u'links: %s, %s' % (len(links), response.url) if links: for link in links: canonical = u'%s%s' % (self.base_url, link.split('?')[0]) if LocationAirportEs.check_by_url(canonical): continue request = Request(canonical, callback=self.parse_airport, errback=self.parse_err) request.meta['page_kind'] = 'airport' yield request if len(links) >= 100: next_page = u'%s?pageid=%s' % (response.url.split('?')[0], current_page + 1) request = Request(next_page, callback=self.parse, errback=self.parse_err) request.meta['page_kind'] = 'list' request.meta['current_page'] = current_page + 1 yield request