def fetch(self, url, tmp_file, date_string=None):
        service = u'https://api-adresse.data.gouv.fr/search/csv/'
        outfile = open(tmp_file, 'w', encoding='utf-8')

        content = self.source.open().readlines()
        header = content[0:1]
        step = 2000
        slices = int((len(content)-1) / step) + 1
        for i in range(0, slices):
            self.logger.log("Geocode slice {0}/{1}".format(i, slices))
            slice = ''.join(header + content[1 + step*i : 1 + step*(i+1)]) # noqa
            r = downloader.requests_retry_session().post(url=service, data={
                'delimiter': self.delimiter,
                'encoding': self.encoding,
                'columns': self.columns,
                'citycode': self.citycode,
            }, files={
                'data': slice,
            })
            r.raise_for_status()
            if i == 0:
                text = '\n'.join(r.text.split('\n')[0:])
            else:
                text = '\n'.join(r.text.split('\n')[1:])
            outfile.write(text)

        return True
Пример #2
0
def dl(url, local, logger=OsmoseLog.logger(), min_file_size=10 * 1024):

    unzip = False
    convert_pbf = False

    # file names
    file_ts = local + ".ts"
    url_ext = os.path.splitext(url)[1]
    local_ext = os.path.splitext(local)[1]
    if (url_ext in [".bz2"]) and (local_ext not in [".bz2"]):
        file_dl = local + url_ext
        unzip = True
    elif (url_ext in [".pbf"]) and (local_ext not in [".pbf"]):
        file_dl = local + url_ext
        convert_pbf = True
    else:
        file_dl = local

    headers = {}

    # make the download conditional
    if os.path.exists(file_dl) and os.path.exists(file_ts):
        headers["If-Modified-Since"] = open(file_ts).read()

    # request fails with a 304 error when the file wasn't modified
    # Retry on 404, workaround Geofabrik update in progress
    answer = downloader.get(
        url,
        headers=headers,
        session=downloader.requests_retry_session(
            status_forcelist=downloader.DEFAULT_RETRY_ON + (404, )))
    if answer.status_code == 304:
        logger.log(u"not newer")
        return False
    if not answer.ok:
        logger.log(u"got error %d" % answer.status_code)
        logger.log(u"  URL=%s" % url)
        answer.raise_for_status()

    url_ts = answer.headers.get('Last-Modified')

    file_size = int(answer.headers.get('Content-Length'))
    if file_size < min_file_size:
        # file must be bigger than 100 KB
        logger.log("File is not big enough: %d B" % file_size)
        raise SystemError

    # write the file
    with open(file_dl, "wb") as outfile:
        for data in answer.iter_content(chunk_size=None):
            outfile.write(data)

    if not answer.headers.get(
            'Content-Encoding') and file_size != os.path.getsize(file_dl):
        logger.log(
            u"error: Download file (%d) not of the expected size (%d) for %s" %
            (os.path.getsize(file_dl), file_size, url))
        os.remove(file_dl)
        return False

    # uncompress
    if unzip:
        logger.log(u"bunzip2")
        subprocess.check_output(['bunzip2', '-f', file_dl])

    # convert pbf to osm
    if convert_pbf:
        logger.log(u"osmconvert")
        subprocess.check_output("{} {} > {}".format(config.bin_osmconvert,
                                                    file_dl, local),
                                shell=True)
        os.remove(file_dl)

    # set timestamp
    open(file_ts, "w").write(url_ts)

    return True