Пример #1
0
def download_zips(line):
    title = line.split(':')[0].strip()
    table_id = line.split(':')[1].strip()
    url_path = scwds.get_full_table_download(table_id)
    _, ext = os.path.splitext(url_path)
    download_file = os.path.join(OUT_PATH, title + ext)
    if os.path.exists(download_file):
        print(f"Download skipped - {title} ")
    else:
        try:
            with requests.get(url_path, stream=True) as response, open(
                    download_file, 'wb') as out_file:
                response.raise_for_status()
                print(f"Downloading - {title}")
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        out_file.write(chunk)
                print(f"Download complete - {title}")
        except requests.exceptions.RequestException as err:
            return {table_id: err}
    try:
        input_zip = zipfile.ZipFile(download_file)
        input_file = input_zip.namelist()[0]
        if os.path.exists(OUT_PATH + input_file):
            print(f"Extraction skipped - {title} ")
        else:
            input_zip.extract(input_file, OUT_PATH)
            print(f"Extracted {input_file} for - {title} ")
        input_zip.close()
        os.remove(download_file)
        return {table_id: "Successful"}
    except Exception as err:
        return {table_id: err}
Пример #2
0
def download_tables(tables, path=None, csv=True):
    """Download a json file and zip of data for a list of tables to path.

    Parameters
    ----------
    tables: list of str
        tables to be downloaded
    path: str or path object, default: None (will do current directory)
        Where to download the table and json
    csv: boolean, default True
        download in CSV format, if not download SDMX

    Returns
    -------
    downloaded: list
        list of tables that were downloaded
    """
    path = pathlib.Path(path) if path else pathlib.Path()
    metas = get_cube_metadata(tables)
    for meta in metas:
        product_id = meta["productId"]
        zip_url = get_full_table_download(product_id, csv=csv)
        zip_file_name = product_id + ("-eng.zip" if csv else ".zip")
        json_file_name = product_id + ".json"
        zip_file = path / zip_file_name
        json_file = path / json_file_name

        # Thanks http://evanhahn.com/python-requests-library-useragent/
        response = requests.get(zip_url,
                                stream=True,
                                headers={"user-agent": None})

        progress_bar = tqdm(
            desc=zip_file_name,
            total=int(response.headers.get("content-length", 0)),
            unit="B",
            unit_scale=True,
        )

        # Thanks https://bit.ly/2sPYPYw
        with open(json_file, "w") as outfile:
            json.dump(meta, outfile)
        with open(zip_file, "wb") as handle:
            for chunk in response.iter_content(chunk_size=512):
                if chunk:  # filter out keep-alive new chunks
                    handle.write(chunk)
                    progress_bar.update(len(chunk))
        progress_bar.close()
    return [meta["productId"] for meta in metas]
Пример #3
0
def download_tables(tables, path=None, csv=True):
    """Download a json file and zip of data for a list of tables to path

    Parameters
    ----------
    tables: list of str
        tables to be downloaded
    path: str, default: None (will do current directory)
        Where to download the table and json
    csv: boolean, default True
        download in CSV format, if not download SDMX

    Returns
    -------
    downloaded: list
        list of tables that were downloaded
    """
    metas = get_cube_metadata(tables)
    for meta in metas:
        product_id = meta["productId"]
        zip_url = get_full_table_download(product_id, csv=csv)
        if csv:
            zip_file = product_id + "-eng.zip"
        else:
            zip_file = product_id + ".zip"
        json_file = product_id + ".json"
        if path:
            zip_file = os.path.join(path, zip_file)
            json_file = os.path.join(path, json_file)
        # Thanks http://evanhahn.com/python-requests-library-useragent/
        response = requests.get(zip_url,
                                stream=True,
                                headers={"user-agent": None})
        # Thanks https://bit.ly/2sPYPYw
        with open(json_file, "w") as outfile:
            json.dump(meta, outfile)
        with open(zip_file, "wb") as handle:
            for chunk in response.iter_content(chunk_size=512):
                if chunk:  # filter out keep-alive new chunks
                    handle.write(chunk)
    downloaded = [meta["productId"] for meta in metas]
    return downloaded