def fetch(self, url, tmp_file, date_string=None): service = u'https://api-adresse.data.gouv.fr/search/csv/' outfile = open(tmp_file, 'w', encoding='utf-8') content = self.source.open().readlines() header = content[0:1] step = 2000 slices = int((len(content)-1) / step) + 1 for i in range(0, slices): self.logger.log("Geocode slice {0}/{1}".format(i, slices)) slice = ''.join(header + content[1 + step*i : 1 + step*(i+1)]) # noqa r = downloader.requests_retry_session().post(url=service, data={ 'delimiter': self.delimiter, 'encoding': self.encoding, 'columns': self.columns, 'citycode': self.citycode, }, files={ 'data': slice, }) r.raise_for_status() if i == 0: text = '\n'.join(r.text.split('\n')[0:]) else: text = '\n'.join(r.text.split('\n')[1:]) outfile.write(text) return True
def dl(url, local, logger=OsmoseLog.logger(), min_file_size=10 * 1024): unzip = False convert_pbf = False # file names file_ts = local + ".ts" url_ext = os.path.splitext(url)[1] local_ext = os.path.splitext(local)[1] if (url_ext in [".bz2"]) and (local_ext not in [".bz2"]): file_dl = local + url_ext unzip = True elif (url_ext in [".pbf"]) and (local_ext not in [".pbf"]): file_dl = local + url_ext convert_pbf = True else: file_dl = local headers = {} # make the download conditional if os.path.exists(file_dl) and os.path.exists(file_ts): headers["If-Modified-Since"] = open(file_ts).read() # request fails with a 304 error when the file wasn't modified # Retry on 404, workaround Geofabrik update in progress answer = downloader.get( url, headers=headers, session=downloader.requests_retry_session( status_forcelist=downloader.DEFAULT_RETRY_ON + (404, ))) if answer.status_code == 304: logger.log(u"not newer") return False if not answer.ok: logger.log(u"got error %d" % answer.status_code) logger.log(u" URL=%s" % url) answer.raise_for_status() url_ts = answer.headers.get('Last-Modified') file_size = int(answer.headers.get('Content-Length')) if file_size < min_file_size: # file must be bigger than 100 KB logger.log("File is not big enough: %d B" % file_size) raise SystemError # write the file with open(file_dl, "wb") as outfile: for data in answer.iter_content(chunk_size=None): outfile.write(data) if not answer.headers.get( 'Content-Encoding') and file_size != os.path.getsize(file_dl): logger.log( u"error: Download file (%d) not of the expected size (%d) for %s" % (os.path.getsize(file_dl), file_size, url)) os.remove(file_dl) return False # uncompress if unzip: logger.log(u"bunzip2") subprocess.check_output(['bunzip2', '-f', file_dl]) # convert pbf to osm if convert_pbf: logger.log(u"osmconvert") subprocess.check_output("{} {} > {}".format(config.bin_osmconvert, file_dl, local), shell=True) os.remove(file_dl) # set timestamp open(file_ts, "w").write(url_ts) return True