Python download_file示例，rows.utils.download_file Python示例

示例#1

0

显示文件

文件： Dados_abertos_FEE.py 项目： LuisPauloRReis/Scripts

 def CSV(fullPath,fileName, idFile):
     download_file("https://dados.fee.tche.br/php/download.php?csv/Municipio/"+idFile+"/1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016", filename=fullPath + fileName+".zip", progress=True)
     fileZip = zipfile.ZipFile(fullPath+fileName+".zip", 'r')
     fileZip.extractall(fullPath)
     fileZip.close()
     for fileCSV in os.listdir(fullPath):
         if ".csv" not in fileCSV:
             os.remove(fullPath + fileCSV)

示例#2

0

显示文件

def download(date, cache=True):
    data_path = Path(__file__).parent / "data"
    if not data_path.exists():
        data_path.mkdir()
    url = f"https://data.brasil.io/dataset/covid19/backup/{date}/obito_cartorio.csv.gz"
    filename = data_path / f"{date}-obito_cartorio.csv.gz"
    if not cache or not filename.exists():
        download_file(url, filename, progress=True)
    return filename

示例#3

0

显示文件

文件： Dados_abertos_FEE.py 项目： LuisPauloRReis/Scripts

def main():
    data_path = pathlib.Path("Lista de Variaveis")
    if not data_path.exists():
        data_path.mkdir()
            
    download_file("https://dados.fee.tche.br/php/doc_down.php?csv/ListaVars", filename="Lista de Variaveis\\ListaVariaveis.zip", progress=True)
    fileZip = zipfile.ZipFile("Lista de Variaveis\\ListaVariaveis.zip", 'r')
    fileZip.extractall("Lista de Variaveis")
    fileZip.close()
    for fileCSV in os.listdir("Lista de Variaveis"):
            if ".csv" not in fileCSV:
                os.remove("Lista de Variaveis\\"+fileCSV)

示例#4

0

显示文件

文件： extractors.py 项目： hilam/eleicoes-brasil

    def download(self, year, force=False):
        filename = self.filename(year)
        if not force and filename.exists():  # File has already been downloaded
            return {"downloaded": False, "filename": filename}

        url = self.url(year)
        file_data = download_file(url, progress=True)
        move_file(file_data.uri, filename)
        return {"downloaded": True, "filename": filename}

示例#5

0

显示文件

文件： extractors.py 项目： leticiadasilva/eleicoes-brasil

    def download(self, year, force=False):
        filename = self.download_filename(year)
        if not filename.parent.exists():
            filename.parent.mkdir(parents=True)
        if not force and filename.exists():  # File has already been downloaded
            return {"downloaded": False, "filename": filename}

        url = self.url(year)
        file_data = download_file(url, progress=True, chunk_size=256 * 1024)
        rename_file(file_data.uri, filename)
        return {"downloaded": True, "filename": filename}

示例#6

0

显示文件

文件： fotos.py 项目： hilam/eleicoes-brasil

def download_photos(year):
    year = str(year)
    url = f"http://agencia.tse.jus.br/estatistica/sead/eleicoes/eleicoes{year}/fotos/"
    table = import_from_uri(url)
    for row in table:
        if row.name == "Parent Directory":
            continue

        filename = download_path / year / row.name
        print(f"Downloading {filename.name}", end="")
        if filename.exists():
            print(" - downloaded already, skipping.")
        else:
            if not filename.parent.exists():
                filename.parent.mkdir()
            print()
            download_file(urljoin(url, row.name),
                          progress=True,
                          filename=filename)
            print(f"  saved: {filename}")

        photo_path = output_path / year
        if not photo_path.exists():
            photo_path.mkdir()
        print(f"  Exporting to: {photo_path}")
        zf = ZipFile(filename)
        for file_info in tqdm(zf.filelist, desc="Exporting pictures"):
            internal_name = file_info.filename
            internal_path = Path(internal_name)
            extension = internal_path.name.split(".")[-1].lower()
            info = internal_path.name.split(".")[0].split("_")[0]
            state, sequence_number = info[1:3], info[3:]
            new_filename = photo_path / state / f"{sequence_number}.{extension}"

            if not new_filename.parent.exists():
                new_filename.parent.mkdir()
            zfobj = zf.open(internal_name)
            with open(new_filename, mode="wb") as fobj:
                fobj.write(zfobj.read())

示例#7

0

显示文件

文件： cli.py 项目： shenkuko001/Adam

def command_pdf_to_text(output_encoding, quiet, backend, pages, source, output):

    # Define page range
    if pages:
        pages = extract_intervals(pages)

    # Define if output is file or stdout
    if output:
        output = open(output, mode="w", encoding=output_encoding)
        write = output.write
    else:
        write = click.echo
        quiet = True
    progress = not quiet

    # Download the file if source is an HTTP URL
    downloaded = False
    if source.lower().startswith("http:") or source.lower().startswith("https:"):
        result = download_file(source, progress=progress, detect=False)
        source = result.uri
        downloaded = True

    reader = rows.plugins.pdf.pdf_to_text(source, page_numbers=pages, backend=backend)
    if progress:  # Calculate total number of pages and create a progress bar
        if pages:
            total_pages = len(pages)
        else:
            total_pages = rows.plugins.pdf.number_of_pages(source, backend=backend)
        reader = tqdm(reader, desc="Extracting text", total=total_pages)

    for page in reader:
        write(page)

    if output:
        output.close()
    if downloaded:
        os.unlink(source)

示例#8

0

显示文件

文件： Multas_Ambientais - Ibama.py 项目： LuisPauloRReis/Scripts

from rows.utils import download_file
import pathlib

estados = [
    "AC", "AL", "AM", "AP", "BA", "CE", "DF", "ES", "GO", "MA", "MT", "MS",
    "MG", "PA", "PB", "PR", "PE", "PI", "RJ", "RN", "RO", "RS", "RR", "SC",
    "SE", "SP", "TO"
]

data_path = pathlib.Path("data")
if not data_path.exists():
    data_path.mkdir()

for UF in estados:
    download_file("https://dadosabertos.ibama.gov.br/dados/SICAFI/" + UF +
                  "/Quantidade/multasDistribuidasBensTutelados.csv",
                  filename="data\\" + UF + ".csv",
                  progress=True)

示例#9

0

显示文件

import pathlib

estados = [
    "AC", "AL", "AM", "AP", "BA", "CE", "DF", "ES", "GO", "MA", "MT", "MS",
    "MG", "PA", "PB", "PR", "PE", "PI", "RJ", "RN", "RO", "RS", "RR", "SC",
    "SE", "SP", "TO"
]

data_path = pathlib.Path("data")
if not data_path.exists():
    data_path.mkdir()

CTF_path = pathlib.Path("data\\CTF")
if not CTF_path.exists():
    CTF_path.mkdir()

AIDA_path = pathlib.Path("data\\AIDA")
if not AIDA_path.exists():
    AIDA_path.mkdir()

for UF in estados:
    download_file("http://dadosabertos.ibama.gov.br/dados/CTF/APP/" + UF +
                  "/pessoasJuridicas.csv",
                  filename="data\\CTF\\CTF-" + UF + ".csv",
                  progress=True)

download_file(
    "http://dadosabertos.ibama.gov.br/dados/CTF/AIDA/pessoasJuridicas.csv",
    filename="data\\AIDA\\AIDA.csv",
    progress=True)

示例#10

0

显示文件

文件： pdf_parser.py 项目： turicas/salarios-governo-amazonas

        )
        for row in table:
            result.append(convert_row(row))

    return rows.import_from_dicts(result)


if __name__ == "__main__":
    from argparse import ArgumentParser
    from pathlib import Path

    from rows.utils import download_file

    parser = ArgumentParser()
    parser.add_argument(
        "--url",
        default="http://www.transparencia.am.gov.br/arquivos/2014/158_201404.pdf",
    )
    args = parser.parse_args()
    url = args.url
    pdf_filename = Path(url).name
    csv_filename = pdf_filename.replace(".pdf", ".csv")

    download_file(url, pdf_filename, progress=True)

    print("Parsing PDF...")
    table = parse_file(pdf_filename)

    print("Exporting to CSV...")
    rows.export_to_csv(table, csv_filename)