def iterate_results(self, include_headers=False): api = self.api job = self # just to be clear job.wait_for_completion() r = api.session.get(job.url, headers=api.headers, stream=True, **api.common_request_params) iterator = r.iter_lines(chunk_size=self._CHUNK_SIZE) headers = next(iterator) # discard the headers if include_headers: yield parse_csv_line(headers) for line in iterator: fields = parse_csv_line(line) yield fields
def iterate_results(self): api = self.api job = self # just to be clear job.wait_for_completion() with requests.Session() as s: r = s.get(job.url, headers=api.headers, stream=True, ) iterator = r.iter_lines(chunk_size=self._CHUNK_SIZE) headers = next(iterator) # discard the headers for line in iterator: fields = parse_csv_line(line) yield fields
def _parse_lines(self): iterator = self._iter_rows() headers = next(iterator) yield headers for line in iterator: parsed_line = parse_csv_line(line) rp_entity_id, entity_type, data_type, data_value, range_start, range_end = parsed_line if rp_entity_id not in self._entities: self._entities[rp_entity_id] = entity = RpEntityReference( rp_entity_id, {}, entity_type=entity_type) else: entity = self._entities[rp_entity_id] data_type = data_type.lower() if data_type not in entity._data: entity._data[data_type] = [] entity._data[data_type].append( dict(data_value=data_value, range_start=range_start, range_end=range_end)) yield line
api_key = os.environ['RP_API_KEY'] # set your API KEY here api = RPApi(api_key) flat_type = 'companies' # can be 'companies' or 'full' flat_list = api.get_flatfile_list(flat_type) for flat_file in flat_list: file_id = flat_file['id'] combined_year_filename = '%s.combined.csv' % file_id if not os.path.isfile(combined_year_filename): with open(combined_year_filename, 'wb') as output: headers_written = False with api.get_flatfile(flat_type, file_id) as flatzip: if not os.path.isfile(file_id): print("Downloading", file_id, flat_file['size'] / 1024 / 1024, "MB") with open(file_id, 'wb') as f: for chunk in flatzip.iter_content(chunk_size=8192): f.write(chunk) with zipfile.ZipFile(file_id) as zipped: for fileinfo in zipped.namelist(): print(fileinfo) with zipped.open(fileinfo) as csv: header_line = next(csv) headers = parse_csv_line(header_line) if not headers_written: output.write(header_line) headers_written = True for line in csv: row = parse_csv_line(line) output.write(line)