Python download_from_drive示例，utils.download_from_drive Python示例

示例#1

0

显示文件

def now(config, country="br"):

    # print(config[country]["drive_paths"]["embaixadores"])
    updates = download_from_drive(
        config[country]["drive_paths"]["embaixadores"])

    # change column names
    updates.columns = [
        "timestamp",
        "email",
        "city_norm",
        "state_id",
        "name",
        "last_updated",
        "number_ventilators",
        "number_beds",
        "n_casos",
        "n_mortes",
        "number_icu_beds",
        "number_available_ventilators",
        "number_tota_icu_beds",
        "source",
    ]

    # treat text
    updates["city_norm"] = updates["city_norm"].apply(treat_text)

    # treat timestamp
    updates["last_updated"] = updates["timestamp"].apply(
        lambda x: pd.to_datetime(x, format="%d/%m/%Y %H:%M:%S"))

    return updates

示例#2

0

显示文件

def now(config, country="br"):
    return (download_from_drive(
        config[country]["drive_paths"]["br_health_region_reopening_data"]
    ).rename(config["br"]["safereopen"]["rename"], axis=1).assign(
        state_num_id=lambda df: df["state_num_id"].astype("int64")).assign(
            health_region=lambda df: df["health_region_id"].astype("int64")).
            assign(cnae=lambda df: df["cnae"].astype("int64")))

示例#3

0

显示文件

def _get_population_by_state(config, country="br"):
    """Get population by state."""

    # adapted from endpoints.get_health

    # download population (by city) spreadsheet
    df = download_from_drive(
        config[country]["drive_paths"]["cities_population"])

    # Fix for default places ids - before "health_system_region"
    places_ids = get_places_id.now(config).assign(
        city_id=lambda df: df["city_id"].astype(int))
    df = df.drop(["city_name", "state_name"],
                 axis=1).merge(places_ids, on=["city_id", "state_id"])

    # Fix date types
    time_cols = [c for c in df.columns if "last_updated" in c]
    df[time_cols] = df[time_cols].apply(pd.to_datetime)

    # adapted from endpoints.get_states_farolcovid_main.now()
    # sum all cities in state
    df = (df.groupby([
        "country_iso",
        "country_name",
        "state_num_id",
        "state_id",
        "state_name",
    ]).agg({
        "population": "sum"
    }).reset_index().sort_values("state_num_id").set_index("state_num_id"))

    return df

示例#4

0

显示文件

def now(config):
    cols = {
        "estabelecimento_codigo_ibge_municipio": "int",
        "paciente_uuid": "str",
        "numero_dose": "str"
    }
    df = pd.read_csv(download_brasilio_table(), usecols=cols.keys()).groupby(
        ["estabelecimento_codigo_ibge_municipio",
         "numero_dose"]).agg({"paciente_uuid": 'count'})
    df = df.reset_index()
    df['estabelecimento_codigo_ibge_municipio'] = df[
        'estabelecimento_codigo_ibge_municipio'].astype(int)
    df['numero_dose'] = df['numero_dose'].astype(int)
    df_pop_city = download_from_drive(
        config["br"]["drive_paths"]["cities_population"])[[
            "country_iso",
            "country_name",
            "state_id",
            "state_name",
            "city_id",
            "city_name",
            "population",
        ]]
    places_id = pd.read_csv(
        "http://datasource.coronacidades.org/br/places/ids")
    df_places = df.merge(places_id,
                         right_on="city_id",
                         left_on="estabelecimento_codigo_ibge_municipio")
    df_group_city = df_places.merge(df_pop_city[{"city_id", "population"}],
                                    on="city_id")
    df_group_city['imunizados'] = (df_group_city[
        df_group_city["numero_dose"] == 2]['paciente_uuid']).astype(int)
    df_group_city['vacinados'] = (df_group_city[
        df_group_city["numero_dose"] == 1]['paciente_uuid']).astype(int)

    # REGIAO
    df_group_region = df_group_city.groupby([
        'health_region_id', 'health_region_name', 'state_id', 'state_name',
        'state_num_id'
    ]).agg({
        "population": "sum",
        "vacinados": "sum",
        "imunizados": "sum"
    })
    df_group_region = df_group_region.reset_index()
    df_group_region['population'] = df_group_region['population'] / 2
    df_group_region['perc_imunizados'] = round(
        df_group_region['imunizados'] / df_group_region['population'] * 100,
        2).fillna(0)
    df_group_region['perc_vacinados'] = round(
        df_group_region['vacinados'] / df_group_region['population'] * 100,
        2).fillna(0)
    df_group_region['nao_vacinados'] = (
        df_group_region['population'] -
        df_group_region['vacinados']).astype(int)
    df_group_region['last_updated'] = pd.to_datetime('now').strftime(
        "%d/%m/%Y")

    return df_group_region

示例#5

0

显示文件

文件： get_health.py 项目： Isabellarossi/coronacidades-datasource

def _read_cities_data(country, config):

    tables = ["cities_population", "health_infrastructure"]

    return {
        name: download_from_drive(config[country]["drive_paths"][name])
        for name in tables
    }

示例#6

0

显示文件

def now(config, country="br"):
    updates = (
        download_from_drive(config[country]["drive_paths"]["CNAE_sectors"])
        .assign(cnae=lambda df: df["cnae"].astype("int64"))
        .assign(essential=lambda df: df["essential"].astype("bool"))
    )

    return updates

示例#7

0

显示文件

def now(config):
    """
    Gera tabela de número de salas, professores e alunos para cada combinação possível de filtro.

    Parameters
    ----------
    config : dict
    """

    # TODO: update link on config!
    df = (
        download_from_drive(
            "https://docs.google.com/spreadsheets/d/1Gw34BlCHNf92vVn-vmzpb_6mIBcg5esN4HQxkF-bews"
        )
        .assign(
            city_id=lambda df: df["city_id"].astype(str),
            state_num_id=lambda df: df["state_num_id"].astype(int),
        )
        .merge(
            pd.concat(
                [
                    get_states_farolcovid_main.now(config)[
                        [
                            "state_num_id",
                            "state_id",
                            "state_name",
                            "overall_alert",
                            "last_updated_cases",
                        ]
                    ].assign(city_id=lambda df: "Todos", city_name=lambda df: "Todos"),
                    get_cities_farolcovid_main.now(config)[
                        [
                            "state_num_id",
                            "state_id",
                            "state_name",
                            "city_id",
                            "city_name",
                            "overall_alert",
                            "last_updated_cases",
                        ]
                    ].assign(city_id=lambda df: df["city_id"].astype(str)),
                ]
            ),
            on=["city_id", "state_num_id"],
            how="left",
        )
    )

    return df

示例#8

0

显示文件

def now(config):
    """
    Tabela auxiliar de número de alunos por ano escolar e combinações possíveis de filtros para calculadora.

    Parameters
    ----------
    config : dict
    """

    return download_from_drive(
        "https://docs.google.com/spreadsheets/d/1aa0WJ2lF3mKn_Tf6n-Te7NWp2KQN8gFLJiYXwRz6xNM"
    ).merge(
        get_cities_farolcovid_main.now(config)[["state_id", "city_name", "city_id"]],
        on=["city_id"],
    )

示例#9

0

显示文件

def _read_df_data(country, config):

    tables = ["cities_population", "health_infrastructure"]

    dfs = {
        name: download_from_drive(config[country]["drive_paths"][name])
        for name in tables
    }

    df = pd.merge(
        dfs["cities_population"],
        dfs["health_infrastructure"],
        on="city_id",
        how="left",
        suffixes=("", "_y"),
    )
    return df.drop([c for c in df.columns if "_y" in c], axis=1)

示例#10

0

显示文件

def now(config):
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--window-size=1420,1080")
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--disable-gpu")

    driver = webdriver.Chrome(
        chrome_options=chrome_options
    )  # chromedriver é instalado via dockerfile

    # Pega dados de Leitos pela especialidade de todos os municipios #
    logger.info("Baixando dados de leitos")
    urlleitos = "http://tabnet.datasus.gov.br/cgi/deftohtm.exe?cnes/cnv/leiintbr.def"
    df_leitos, updatedate = get_leitos(driver, urlleitos)
    # Ultima data de atualizacao do dado CNES
    updatedate = get_date(updatedate)

    # Pega dados de Leitos complementares de todos os municipios #
    logger.info("Baixando dados de leitos UTI")
    urlleitoscomp = (
        "http://tabnet.datasus.gov.br/cgi/deftohtm.exe?cnes/cnv/leiutibr.def"
    )
    df_leitos_comp = get_urlleitoscomp(driver, urlleitoscomp)

    # Pega dados de Respiradores dos Municipios #
    logger.info("Baixando dados de respiradores")
    urlresp = "http://tabnet.datasus.gov.br/cgi/deftohtm.exe?cnes/cnv/equipobr.def"
    df_respiradores = get_respiradores(driver, urlresp)

    # Une os diferentes dataframes #
    df_cnes = df_leitos.merge(df_leitos_comp, how="left", on=["city_id", "city_name"])
    df_cnes = df_cnes.merge(df_respiradores, how="left", on=["city_id", "city_name"])
    logger.info("Une dados de leitos, leitos UTI e respiradores")

    # df_cnes["city_id"] = df_cnes["city_id"].astype(str)

    df_cnes = df_cnes.replace({"-": 0}, regex=True)
    df_cnes = df_cnes.replace(np.nan, 0, regex=True)

    # Conserta tipos de colunas
    resources = [
        "cirurgico_tot",
        "clinico_tot",
        "hospital_dia_tot",
        "UTI_adulto_I_tot",
        "UTI_adulto_II_tot",
        "UTI_adulto_III_tot",
        "UTI_adulto_II_COVID_SUS",
        "UTI_adulto_II_COVID_nao_SUS",
        "UTI_pediatrica_II_COVID_SUS",
        "UTI_pediatrica_II_COVID_nao_SUS",
        "number_ventilators",
    ]

    for col in resources:
        df_cnes[col] = df_cnes[col].astype(str).astype(float).astype(int).fillna(0)

    # Agrupa total de leitos enfermaria
    df_cnes["number_beds"] = (
        df_cnes["cirurgico_tot"] + df_cnes["clinico_tot"] + df_cnes["hospital_dia_tot"]
    )

    # Agrupa total de leitos UTI
    df_cnes["number_icu_beds"] = (
        df_cnes["UTI_adulto_I_tot"]
        + df_cnes["UTI_adulto_II_tot"]
        + df_cnes["UTI_adulto_III_tot"]
    )

    # Agrupa total de leitos UTI Covid
    df_cnes["number_covid_icu_beds"] = (
        df_cnes["UTI_adulto_II_COVID_SUS"]
        + df_cnes["UTI_adulto_II_COVID_nao_SUS"]
        + df_cnes["UTI_pediatrica_II_COVID_SUS"]
        + df_cnes["UTI_pediatrica_II_COVID_nao_SUS"]
    )

    # Da merge com os dados de populacao
    places_ids = get_places_id.now(config)

    # Cria coluna de IBGE 6 dígitos para match
    places_ids["city_id_7d"] = places_ids["city_id"]
    places_ids["city_id"] = places_ids["city_id"]
    places_ids["city_id"] = places_ids["city_id"].astype(str).apply(lambda x: x[:-1])

    df_cnes = places_ids.merge(df_cnes, how="left", on=["city_id"], suffixes=["", "_y"])

    df_cnes["city_id"] = df_cnes["city_id_7d"]
    df_cnes = df_cnes.drop(columns="city_id_7d")

    df_pop = download_from_drive(config["br"]["drive_paths"]["cities_population"])[
        [
            "country_iso",
            "country_name",
            "state_id",
            "state_name",
            "city_id",
            "city_name",
            "population",
        ]
    ]

    df_cnes = pd.merge(df_cnes, df_pop, on="city_id", how="left", suffixes=("", "_y"))
    df_cnes = df_cnes.drop(
        [
            "state_name_y",
            "UTI_pediatrica_II_COVID_nao_SUS",
            "city_name_y",
            "pediatrico_tot",
            "UTI_adulto_II_COVID_SUS",
            "UTI_pediatrica_II_COVID_SUS",
            "UTI_adulto_II_COVID_nao_SUS",
            "state_id_y",
            "cirurgico_tot",
            "clinico_tot",
            "hospital_dia_tot",
            "UTI_adulto_I_tot",
            "UTI_adulto_II_tot",
            "UTI_adulto_III_tot",
        ],
        axis=1,
    )

    # Preenche zero recursos para cidades com NaN
    resources = [
        "number_icu_beds",
        "number_beds",
        "number_covid_icu_beds",
        "number_ventilators",
    ]
    df_cnes[resources] = df_cnes[resources].fillna(0)

    # todayday = datetime.now().strftime("%Y-%m-%d")
    (
        df_cnes["last_updated_number_ventilators"],
        df_cnes["last_updated_number_beds"],
        df_cnes["last_updated_number_icu_beds"],
        df_cnes["last_updated_number_covid_icu_beds"],
        df_cnes["author_number_beds"],
        df_cnes["author_number_ventilators"],
        df_cnes["author_number_icu_beds"],
    ) = (
        updatedate,
        updatedate,
        updatedate,
        updatedate,
        "DataSUS",
        "DataSUS",
        "DataSUS",
    )

    return df_cnes

示例#11

0

显示文件

def now(config, country="br"):

    return download_from_drive(
        config[country]["drive_paths"]["br_id_state_region_city"]
    )