def parse_list(self, response): yield from self.parse(response) if not self.sample: data = json.loads(response.text) total = data['maxPage'] for page in range(2, total + 1): url = replace_parameter(response.request.url, 'page', page) yield self.build_request(url, formatter=parameters('page'))
def parse_list(self, response): data = json.loads(response.text) # The last page returns an empty JSON object. if not data: return for item in data['data']: url = replace_parameter(response.request.url, 'offset', None) + item['ocid'] yield self.build_request(url, formatter=components(-2)) if self.sample: return url = replace_parameter(response.request.url, 'offset', data['offset']) yield self.build_request(url, formatter=join(components(-1), parameters('offset')), callback=self.parse_list)
def parse_list(self, response): yield from self.parse(response) if not self.sample: data = json.loads(response.text) offset = data['meta']['pagination']['limit'] total = data['meta']['count'] for offset in range(offset, total, self.step): url = replace_parameter(response.request.url, 'offset', offset) yield self.build_request(url, formatter=parameters('offset'))
def parse_list(self, response): yield from self.parse(response) if not self.sample: data = json.loads(response.text) page = data['pagination']['page'] total = data['pagination']['total'] limit = data['pagination']['pageSize'] for page in range(page + 1, ceil(total / limit)): url = replace_parameter(response.request.url, 'page', page) yield self.build_request(url, formatter=parameters('page'))
def parse_pages(self, response): content = json.loads(response.text) for url in self.get_files_to_download(content): yield self.build_request(url, formatter=components(-1), dont_filter=True) pagination = content['pagination'] if pagination['current_page'] < pagination[ 'total_pages'] and not self.sample: page = pagination['current_page'] + 1 url = replace_parameter(response.request.url, 'page', page) yield self.build_request(url, formatter=parameters( 'fecha_desde', 'page'), dont_filter=True, callback=self.parse_pages)
def parse_list(self, response): base_url = 'http://public.eprocurement.systems/ocds/tenders/' data = json.loads(response.text) # The last page returns an empty JSON object. if not data: return for item in data['data']: yield self.build_request(base_url + item['ocid'], formatter=components(-1)) if self.sample: return url = replace_parameter(response.request.url, 'offset', data['offset']) yield self.build_request(url, formatter=parameters('offset'), callback=self.parse_list)
def test_replace_parameter(url, value, expected): assert replace_parameter(url, 'page', value) == expected