def save_url(self, filename, data, file_path): save_content_response = save_content(data['url'], file_path) if save_content_response.errors: return self.SaveUrlResult(errors=save_content_response.errors, warnings=save_content_response.warnings) additional = [] if not self.sample: with open(file_path) as f: json_data = json.load(f) if 'links' in json_data and 'next' in json_data[ 'links'] and json_data['links']['next']: additional.append({ 'url': json_data['links']['next'], 'filename': 'page-%s.json' % hashlib.md5(json_data['links']['next'].encode( 'utf-8')).hexdigest(), 'data_type': 'release_package', # We set priority the same so that all the requests for one year are done at the same time. # Because of how this pages using cursors, it's probably best to get them as fast as possible. 'priority': data['priority'], }) return self.SaveUrlResult(additional_files=additional, warnings=save_content_response.warnings)
def save_url(self, filename, data, file_path): save_content_response = save_content(data['url'], file_path) if save_content_response.errors: return self.SaveUrlResult(errors=save_content_response.errors, warnings=save_content_response.warnings) additional = [] if data['data_type'] == 'record_package': with open(file_path) as f: json_data = json.load(f) if 'packages' in json_data: for url in json_data['packages']: additional.append({ 'url': url, 'filename': 'packages-%s.json' % hashlib.md5(url.encode('utf-8')).hexdigest(), 'data_type': 'release_package', }) return self.SaveUrlResult(additional_files=additional, warnings=save_content_response.warnings)
def save_url(self, filename, data, file_path): if data['data_type'] == 'meta': response, errors = util.get_url_request(data['url'], verify_ssl=False) if errors: return self.SaveUrlResult(errors=errors) data = json.loads(response.text) additional = [] if "data" in data.keys(): # Sometimes it's a dict, sometimes it's a list. if isinstance(data['data'], dict): data['data'] = data['data'].values() for data_item in data["data"]: if not self.sample or (self.sample and len(additional) < 10): additional.append({ 'url': data_item['uri'], 'filename': '{}.json'.format(data_item['ocid']), 'data_type': 'release', 'priority': 1, }) return self.SaveUrlResult(additional_files=additional) else: save_content_response = util.save_content(data['url'], file_path, verify_ssl=False) return self.SaveUrlResult(errors=save_content_response.errors, warnings=save_content_response.warnings)
def save_url(self, filename, data, file_path): if data['data_type'] == 'release_package': save_content_response = save_content(data['url'], file_path) if save_content_response.errors: return self.SaveUrlResult( errors=save_content_response.errors, warnings=save_content_response.warnings) additional = [] with open(file_path) as f: json_data = json.load(f) page = int(filename.split('-')[1]) if 'next_page' in json_data and 'uri' in json_data[ 'next_page'] and (not self.sample or page < 3): page += 1 additional.append({ 'url': json_data['next_page']['uri'], 'filename': 'page-%d-.json' % page, 'data_type': 'release_package', }) return self.SaveUrlResult(additional_files=additional, warnings=save_content_response.warnings)
def save_url(self, filename, data, file_path): save_content_response = save_content(data['url'], file_path+'-temp.tar.gz', replace_control_codes=False) if save_content_response.errors: return self.SaveUrlResult(errors=save_content_response.errors, warnings=save_content_response.warnings) with tarfile.open(file_path+'-temp.tar.gz', "r:gz") as tar: with tar.extractfile(tar.getnames()[0]) as readfp: with open(file_path, "wb") as writefp: while True: buf = readfp.read(1024 ^ 2) if buf: writefp.write(buf) else: break return self.SaveUrlResult()
def save_url(self, filename, data, file_path): save_content_response = save_content(data['url'], file_path) if save_content_response.errors: return self.SaveUrlResult(errors=save_content_response.errors, warnings=save_content_response.warnings) additional = [] if data['data_type'] == 'meta': with open(file_path) as f: json_data = json.load(f) for item in json_data: if not self.sample or (self.sample and len(additional) < 10): additional.append({ 'url': item, 'filename': 'release-%s.json' % item.split('/')[-1], 'data_type': 'release', }) return self.SaveUrlResult(additional_files=additional, warnings=save_content_response.warnings)
def save_url(self, file_name, data, file_path): save_content_response = save_content(data['url'], file_path) return self.SaveUrlResult(errors=save_content_response.errors, warnings=save_content_response.warnings)
def save_url(self, filename, data, file_path): save_content_response = save_content(data['url'], file_path) if save_content_response.errors: return self.SaveUrlResult(errors=save_content_response.errors, warnings=save_content_response.warnings) additional = [] if data['data_type'] == 'meta': with open(file_path) as f: json_data = json.load(f) page = int(filename.split('-')[3]) type = filename.split('-')[1] if 'links' in json_data and 'next' in json_data['links'] and ( not self.sample or page < 3): page += 1 additional.append({ 'url': json_data['links']['next'], 'filename': 'type-%s-page-%d-.json' % (type, page), 'data_type': 'meta', 'priority': 10, }) count = 0 for release in json_data['releases']: if not self.sample or count < 3: stage_urls = [] if type == 'planning': uuid = release['tender']['plannedProcurementUUID'] stage_urls.append( 'https://tenders.nsw.gov.au/?event=public.api.planning.view' '&PlannedProcurementUUID=%s' % uuid) if type == 'tender': uuid = release['tender']['RFTUUID'] stage_urls.append( 'https://tenders.nsw.gov.au/?event=public.api.tender.view&RFTUUID=%s' % uuid) if type == 'contract': for award in release['awards']: uuid = award['CNUUID'] stage_urls.append( 'https://tenders.nsw.gov.au/?event=public.api.contract.view&CNUUID=%s' % uuid) count += 1 for url in stage_urls: additional.append({ 'url': url, 'filename': 'packages-%s.json' % hashlib.md5(url.encode('utf-8')).hexdigest(), 'data_type': 'release_package', 'priority': 1, }) return self.SaveUrlResult(additional_files=additional, warnings=save_content_response.warnings)