def main():
    df = pd.read_csv(DATA_URL, parse_dates=["data_somministrazione"])
    df = df.rename(
        columns={
            "data_somministrazione": "date",
            "area": "region",
            "totale": "total_vaccinations",
            "prima_dose": "people_vaccinated",
            "seconda_dose": "people_fully_vaccinated",
        })
    df.loc[:, "date"] = pd.to_datetime(df.loc[:, "date"], format="%Y-%m-%d")
    df.loc[:, "date"] = df.loc[:, "date"].dt.strftime("%Y-%m-%d")
    df.loc[:, "location"] = COUNTRY
    # Compute cumsums
    df = df.sort_values(by="date")
    df["total_vaccinations"] = (
        df.groupby("region")["total_vaccinations"].cumsum().values)
    df["people_vaccinated"] = df.groupby(
        "region")["people_vaccinated"].cumsum().values
    df["people_fully_vaccinated"] = (
        df.groupby("region")["people_fully_vaccinated"].cumsum().values)
    df = df[df.loc[:, "region"] != "ITA"]
    # Add ISO codes
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    #  Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
Exemplo n.º 2
0
def main():
    # Load current data
    df_source = pd.read_csv(OUTPUT_FILE)

    # Request and Get data
    page = requests.get(DATA_URL)
    soup = BeautifulSoup(page.content, "html.parser")
    table = soup.find(
        "p", string=re.compile("Ваксинирани лица по")).parent.find("table")
    df = pd.read_html(str(table))[0]
    df = df.droplevel(level=0, axis=1)
    date = str(
        datetime.datetime.now(pytz.timezone("Europe/Sofia")).date() -
        datetime.timedelta(days=1))

    df = df.rename(columns={"Област": "region", "Общо": "total_vaccinations"})
    df = df[~(df.loc[:, "region"] == "Общо")]
    df.loc[:, "region"] = df.loc[:, "region"].replace(replace)
    df.loc[:, "date"] = date
    df.loc[:, "location"] = COUNTRY

    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    #  Concat
    df_source = df_source.loc[~(df_source.loc[:, "date"] == date)]
    df = pd.concat([df, df_source])

    #  Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
def main():
    df = pd.read_csv(DATA_URL,
                     usecols=[
                         "date", "state", "firstDosesCumulative",
                         "secondDosesCumulative"
                     ])
    df = df.rename(
        columns={
            "state": "region",
            "firstDosesCumulative": "people_vaccinated",
            "secondDosesCumulative": "people_fully_vaccinated"
        })
    df.loc[:,
           "total_vaccinations"] = df.loc[:,
                                          "people_vaccinated"] + df.loc[:,
                                                                        "people_fully_vaccinated"]
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df.loc[:, "date"] = pd.to_datetime(df.loc[:, "date"], format="%Y-%m-%d")
    df.loc[:, "date"] = df.loc[:, "date"].dt.strftime("%Y-%m-%d")
    df.loc[:, "location"] = COUNTRY

    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
def main():
    # Load current data
    df_source = pd.read_csv(OUTPUT_FILE)

    # Load data
    df, date = load_data(DATA_URL)

    # Rename columns
    df = df.rename(columns={"Län": "region"})

    # Process columns
    df.loc[:, "total_vaccinations"] = column_str2int(
        df.loc[:, "Moderna"]) + column_str2int(df.loc[:, "Pfizer/BioNTech"])
    df.loc[:, "location"] = COUNTRY
    df.loc[:, "date"] = date

    # Remove total numbers
    df = df.loc[~(df.loc[:, "region"] == "Totala summan")]

    # Get iso codes
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Concat
    df_source = df_source.loc[~(df_source.loc[:, "date"] == date)]
    df = pd.concat([df, df_source])

    # Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
Exemplo n.º 5
0
def main():
    # Load current data
    df_source = pd.read_csv(OUTPUT_FILE)

    # Load data
    df = load_data(DATA_URL)

    # Process columns
    df = df.rename(
        columns={
            "jpt_nazwa_": "region",
            "SZCZEPIENIA_SUMA": "total_vaccinations",
            "DAWKA_2_SUMA": "people_fully_vaccinated",
        })
    df.loc[:, "location"] = COUNTRY
    date = (datetime.datetime.now(
        pytz.timezone("Europe/Warsaw")).date().strftime("%Y-%m-%d"))
    df.loc[:, "date"] = date
    df.loc[:, "people_vaccinated"] = (df.loc[:, "total_vaccinations"] -
                                      df.loc[:, "people_fully_vaccinated"])
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)

    # ISO
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Concat
    df_source = df_source.loc[~(df_source.loc[:, "date"] == date)]
    df = pd.concat([df, df_source])

    #  Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
Exemplo n.º 6
0
def main():
    df = pd.read_csv(DATA_URL, dtype={"personas con pauta completa": str})
    df = df.rename(columns={
        "informe": "date",
        "comunidad autónoma": "region",
        "dosis administradas": "total_vaccinations",
        "personas con pauta completa": "people_fully_vaccinated"
    })
    df.loc[:, "people_fully_vaccinated"] = df.loc[:, "people_fully_vaccinated"].fillna("nan")

    df = df.astype({
        "total_vaccinations": str,
        "people_fully_vaccinated": str
    })

    df = df[~(df.loc[:, "region"]=="Totales")]
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df.loc[:, "date"] = pd.to_datetime(df.loc[:, "date"], format="%d/%m/%Y")
    df.loc[:, "date"] = df.loc[:, "date"].dt.strftime("%Y-%m-%d")
    df.loc[:, "total_vaccinations"] = df.loc[:, "total_vaccinations"].apply(lambda x: int(x.replace(".", "")))
    df.loc[:, "people_fully_vaccinated"] = df.loc[:, "people_fully_vaccinated"].apply(
        lambda x: int(x.replace(".", "") if x != "nan" else 0)
    )
    df.loc[:, "people_vaccinated"] = df.loc[:, "total_vaccinations"] - df.loc[:, "people_fully_vaccinated"]
    df.loc[:, "location"] = COUNTRY

    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)
    
    # Export
    export_data(
        df=df,
        data_url_reference=DATA_URL_REFERENCE,
        output_file=OUTPUT_FILE
    )
Exemplo n.º 7
0
def main():
    # Get data
    df = pd.read_csv(DATA_URL, usecols=["state", "date", "vaccinated"])
    df = df.rename(columns={"vaccinated": "total_vaccinations"})

    # Get data after vaccination started
    start_date = "2021-01-18"
    df = df.loc[df["date"] >= start_date]

    # Process vaccinations
    df.loc[:, "total_vaccinations"] = df.loc[:, "total_vaccinations"].fillna(
        0).astype(int)

    # Get region iso
    df = df[~(df.loc[:, "state"] == "TOTAL")]
    df.loc[:, "region_iso"] = f"{COUNTRY_ISO}-" + df.loc[:, "state"]

    # Get region name
    df = ISODB().merge(df, mode="region")
    df.loc[:, "location"] = COUNTRY

    # Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
def main():
    # Load
    df = pd.read_csv(DATA_URL,
                     sep=";",
                     usecols=[
                         "Datum", "Name", "EingetrageneImpfungen",
                         "Teilgeimpfte", "Vollimmunisierte"
                     ])
    df = df.loc[df["Name"] != "Österreich"]

    # Rename columns
    df = df.rename(
        columns={
            "Datum": "date",
            "Name": "region",
            "EingetrageneImpfungen": "total_vaccinations",
            "Teilgeimpfte": "people_vaccinated",
            "Vollimmunisierte": "people_fully_vaccinated"
        })
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)

    # Column proccess
    df.loc[:, "date"] = df.date.str.slice(0, 10)
    df.loc[:, "location"] = COUNTRY

    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
def main():
    #  Load data
    df = pd.read_csv(DATA_URL, sep=";")

    df = df.rename(
        columns={
            "Date": "date",
            "Region": "region",
            "first_dose": "people_vaccinated",
            "second_dose": "people_fully_vaccinated",
        }
    )

    #  Cumsum
    df = df.sort_values(by="date")
    df["people_vaccinated"] = df.groupby("region")["people_vaccinated"].cumsum().values
    df["people_fully_vaccinated"] = (
        df.groupby("region")["people_fully_vaccinated"].cumsum().values
    )

    #  Add columns
    df.loc[:, "total_vaccinations"] = (
        df.loc[:, "people_vaccinated"] + df.loc[:, "people_fully_vaccinated"]
    )
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df.loc[:, "location"] = COUNTRY

    #  Get iso codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    #  Export
    export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)
Exemplo n.º 10
0
def main():
    # Load current data
    df_source = pd.read_csv(OUTPUT_FILE)

    # Load data
    page_content = requests.get(DATA_URL, headers={
        "User-Agent": "Custom"
    }).content
    soup = BeautifulSoup(page_content, "html.parser")

    # Get new data
    boxes = soup.findAll(class_="col-12 col-md-6 col-xl-4")
    new_data = []
    if len(boxes) == 3:
        for box in boxes:
            fields = box.findAll(class_="col-12")
            if len(fields) == 4:
                region = fields[0].text.strip()
                if "Vaccines administered" in fields[1].text:
                    total, regional = fields[1].findAll(
                        class_="col-auto text-end")
                    dose_1, dose_2 = list(
                        map(
                            lambda x: int(x.replace(",", "")),
                            regional.text.strip().split("\n"),
                        ))
                    new_data.append([region, dose_1, dose_2])
    df = pd.DataFrame(
        new_data,
        columns=["region", "people_vaccinated", "people_fully_vaccinated"])

    # Process
    df.loc[:, "total_vaccinations"] = (df.loc[:, "people_vaccinated"] +
                                       df.loc[:, "people_fully_vaccinated"])
    df.loc[:, "location"] = COUNTRY

    # Join with date
    url = "https://covid-vaccinatie.be/en/vaccines-administered.xlsx"
    df_dates = get_date(url)
    df = df.merge(df_dates, left_on="region", right_on="Region", how="left")

    # ISO
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Concatenate
    region = df_dates.index.tolist()
    date = df_dates.date.tolist()
    df_source = df_source.loc[~(df_source["region"].isin(region)
                                & df_source["date"].isin(date))]
    df = pd.concat([df, df_source])

    #  Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
def main():
    # Load current data
    df_source = pd.read_csv(OUTPUT_FILE)

    # Load new data from API
    data = requests.get(DATA_URL).json()
    df = (pd.DataFrame(data["vaccination_data"]).fillna(0).astype({
        "Administered_Dose1":
        int,
        "Administered_Dose2":
        int
    }))

    # Add data if new is available
    cols = ["LongName", "Date", "Administered_Dose1", "Administered_Dose2"]
    df = df[cols]
    # Process columns
    df.loc[:, "total_vaccinations"] = (df.loc[:, "Administered_Dose1"] +
                                       df.loc[:, "Administered_Dose2"])
    df = df.rename(
        columns={
            "LongName": "region",
            "Date": "date",
            "Administered_Dose1": "people_vaccinated",
            "Administered_Dose2": "people_fully_vaccinated",
        })
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df.loc[:, "date"] = pd.to_datetime(df.loc[:, "date"], format="%Y-%m-%d")
    df.loc[:, "date"] = df.loc[:, "date"].dt.strftime("%Y-%m-%d")
    df = df[~df["region"].isin(["United States", "Long Term Care"])]
    df.loc[:, "location"] = "United States"
    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)
    df.loc[df["region"] == "Federated States of Micronesia",
           "location_iso"] = "FM"
    df.loc[df["region"] == "Marshall Islands", "location_iso"] = "MH"
    df.loc[df["region"] == "Puerto Rico", "location_iso"] = "PR"
    df.loc[df["region"] == "Republic of Palau", "location_iso"] = "PW"
    df.loc[df["region"] == "Bureau of Prisons", "location_iso"] = "US"
    df.loc[df["region"] == "Dept of Defense", "location_iso"] = "US"
    df.loc[df["region"] == "Indian Health Svc", "location_iso"] = "US"
    df.loc[df["region"] == "Veterans Health", "location_iso"] = "US"

    #  Concat
    dates = df.loc[:, "date"].unique().tolist()
    if len(dates) != 1:
        raise Exception("Multiple dates detected!")
    df_source = df_source.loc[~(df_source.loc[:, "date"] == dates[0])]
    df = pd.concat([df, df_source])

    #  Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
Exemplo n.º 12
0
def main_li(df):
    # Liechestein
    df_li = df.loc[df.loc[:, "region_iso"] == "FL"].reset_index(drop=True)
    df_li.loc[:, "region_iso"] = df_li.loc[:, "region_iso"].replace(
        {"FL": COUNTRY_LI})
    df_li.loc[:, "location"] = COUNTRY_LI
    df_li.loc[:, "location_iso"] = COUNTRY_ISO_LI
    df_li.loc[:, "region"] = "-"

    #  Export
    export_data(df=df_li,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE_LI)
def main():
    # Load data
    df = load_data(DATA_URL)

    # Replace region names
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)

    # ISO
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    #  Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
def main():
    # Load data
    df = pd.read_csv(DATA_URL)

    # Check 1
    cols = ["datum", "vakcina", "kraj_nuts_kod", "kraj_nazev", "vekova_skupina", "prvnich_davek", "druhych_davek", "celkem_davek"]
    if not all([col in df.columns for col in cols]):
        raise Exception("API changed")

    # Column renaming
    df = df.rename(columns={
        "datum": "date",
        "kraj_nazev": "region",
        "prvnich_davek": "people_vaccinated",
        "druhych_davek": "people_fully_vaccinated",
        "celkem_davek": "total_vaccinations"
    })

    # Add counts per day
    df = df.groupby(by=["date", "region"]).agg(
        people_vaccinated=("people_vaccinated", sum),
        people_fully_vaccinated=("people_fully_vaccinated", sum),
        total_vaccinations=("total_vaccinations", sum)
    ).reset_index()

    # Check 2
    if not (df["total_vaccinations"] == df["people_vaccinated"] + df["people_fully_vaccinated"]).all():
        raise Exception("Error in columns. dose_1 + dose_2 != total_doses")

    # Rename regions
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df.loc[:, "location"] = COUNTRY

    # ISO
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Compute cumsums
    df = df.sort_values(by="date")
    df.loc[:, "total_vaccinations"] = df.groupby("region")["total_vaccinations"].cumsum().values
    df.loc[:, "people_vaccinated"] = df.groupby("region")["people_vaccinated"].cumsum().values
    df.loc[:, "people_fully_vaccinated"] = df.groupby("region")["people_fully_vaccinated"].cumsum().values

    # Export
    export_data(
        df=df,
        data_url_reference=DATA_URL_REFERENCE,
        output_file=OUTPUT_FILE
    )
Exemplo n.º 15
0
def main():
    COLUMNS_RENAMING = {
        "date_vaccine_administered": "date",
        "province": "region",
        "cumulative_avaccine": "total_vaccinations"
    }
    df = pd.read_csv(DATA_URL_1, usecols=COLUMNS_RENAMING.keys())
    df = df.rename(columns=COLUMNS_RENAMING)
    # Date
    df.loc[:, "date"] = pd.to_datetime(df.loc[:, "date"], format="%d-%m-%Y")
    df.loc[:, "date"] = df.loc[:, "date"].dt.strftime("%Y-%m-%d")
    # New cols
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df.loc[:, "location"] = COUNTRY
    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Add completed vaccinations
    COLUMNS_RENAMING = {
        "date_vaccine_completed": "date",
        "province": "region",
        "cumulative_cvaccine": "people_fully_vaccinated"
    }
    df_2 = pd.read_csv(DATA_URL_2, usecols=COLUMNS_RENAMING.keys())
    df_2 = df_2.rename(columns=COLUMNS_RENAMING)
    # Date
    df_2.loc[:, "date"] = pd.to_datetime(df_2.loc[:, "date"],
                                         format="%d-%m-%Y")
    df_2.loc[:, "date"] = df_2.loc[:, "date"].dt.strftime("%Y-%m-%d")
    # New cols
    df_2.loc[:, "region"] = df_2.loc[:, "region"].replace(REGION_RENAMING)
    df = df.merge(df_2, on=["region", "date"], how="left")
    df.loc[:,
           "people_fully_vaccinated"] = df.loc[:,
                                               "people_fully_vaccinated"].fillna(
                                                   0).astype(int)
    df.loc[:,
           "people_vaccinated"] = df.loc[:,
                                         "total_vaccinations"] - df.loc[:,
                                                                        "people_fully_vaccinated"].astype(
                                                                            int
                                                                        )

    # Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
Exemplo n.º 16
0
def main_ch(df):
    # Switzerland
    df_ch = df.loc[~df.loc[:, "region_iso"].isin(["CHFL", "FL"])].reset_index(
        drop=True)

    # Process columns
    df_ch.loc[:,
              "region_iso"] = f"{COUNTRY_ISO_CH}-" + df_ch.loc[:, "region_iso"]
    df_ch.loc[:, "location"] = COUNTRY_CH

    # Get region names
    df_ch = ISODB().merge(df_ch, mode="region")

    #  Export
    export_data(df=df_ch,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE_CH)
Exemplo n.º 17
0
def main():
    # Load current file
    df_source = pd.read_csv(OUTPUT_FILE)

    #  Get date
    driver = load_driver(DATA_URL)
    try:
        date = load_date(driver)
    except:
        raise Exception("Date not found!")

    # Load dose 1 data
    url = "https://www.fhi.no/api/chartdata/api/99112"
    dix = json.loads(requests.get(url).content)
    df_dose1 = pd.DataFrame(dix, columns=["region", "people_vaccinated"])
    # Load dose 2 data
    url = "https://www.fhi.no/api/chartdata/api/99111"
    dix = json.loads(requests.get(url).content)
    df_dose2 = pd.DataFrame(dix, columns=["region", "people_fully_vaccinated"])
    # Remove row
    df_dose1 = df_dose1.loc[~(df_dose2["region"] == "Fylke")]
    df_dose2 = df_dose2.loc[~(df_dose2["region"] == "Fylke")]
    # Merge
    df = df_dose1.merge(df_dose2, on="region", how="left")

    # Process region column
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)

    #  Add columns
    df.loc[:, "date"] = date
    df.loc[:, "location"] = COUNTRY
    df.loc[:, "total_vaccinations"] = (df.loc[:, "people_fully_vaccinated"] +
                                       df.loc[:, "people_vaccinated"])

    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Concat
    df_source = df_source.loc[~(df_source.loc[:, "date"] == date)]
    df = pd.concat([df, df_source])

    #  Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
def main():
    # Load current data
    df_source = pd.read_csv(OUTPUT_FILE)

    # Load new data
    html_page = urllib.request.urlopen(DATA_URL)
    soup = BeautifulSoup(html_page, "html.parser")

    # Get new date
    date = get_date(soup)

    # Get df
    df = get_df(soup)

    # Rename columns
    df = df.rename(
        columns={
            "primera_dosis_cantidad": "people_vaccinated",
            "segunda_dosis_cantidad": "people_fully_vaccinated",
            "jurisdiccion_nombre": "region"
        })

    # Process columns
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df.loc[:,
           "total_vaccinations"] = df.loc[:,
                                          "people_vaccinated"] + df.loc[:,
                                                                        "people_fully_vaccinated"]
    df.loc[:, "location"] = COUNTRY
    df.loc[:, "date"] = date

    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Concatenate
    df_source = df_source.loc[~(df_source.loc[:, "date"] == date)]
    df = pd.concat([df, df_source])

    # Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
def main():
    # Load
    df = read_xlsx_from_url(DATA_URL)

    # Rename
    df = df.rename(
        columns={
            "Date": "date",
            "Region": "region",
            "1st dose": "people_vaccinated",
            "2nd dose": "people_fully_vaccinated",
        })

    # Remove NaN regions
    df = df.loc[~df.loc[:, "region"].isnull()]

    #  Process
    df.loc[:, "people_fully_vaccinated"] = (
        df.loc[:, "people_fully_vaccinated"].fillna(0).astype(int))
    df.loc[:, "total_vaccinations"] = (df.loc[:, "people_vaccinated"] +
                                       df.loc[:, "people_fully_vaccinated"])
    df.loc[:, "location"] = COUNTRY
    df.loc[:, "date"] = df.loc[:, "date"].apply(
        lambda x: datetime.strptime(x, "%d/%m/%Y").strftime("%Y-%m-%d"))

    # Cumsum
    df = df.sort_values(by="date")
    df["people_vaccinated"] = df.groupby(
        "region")["people_vaccinated"].cumsum().values
    df["people_fully_vaccinated"] = (
        df.groupby("region")["people_fully_vaccinated"].cumsum().values)
    df["total_vaccinations"] = (
        df.groupby("region")["total_vaccinations"].cumsum().values)

    #  ISO
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    #  Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
Exemplo n.º 20
0
def main():
    # Request & downloa data
    page_content = requests.get(DATA_URL, headers={
        'User-Agent': 'Custom'
    }).content
    soup = BeautifulSoup(page_content, "html.parser")
    # Build DataFrame
    df = read_psv(str(soup), sep=",")
    df = df.rename(columns={
        "nom": "region",
        "total_vaccines": "total_vaccinations"
    })
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)
    df.loc[:, "location"] = COUNTRY
    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
Exemplo n.º 21
0
def main():
    df = pd.read_csv(DATA_URL)
    df = df.rename(
        columns={
            "areaName": "region",
            "cumPeopleVaccinatedFirstDoseByPublishDate": "people_vaccinated",
            "cumPeopleVaccinatedSecondDoseByPublishDate":
            "people_fully_vaccinated"
        })
    df.loc[:, "location"] = COUNTRY
    df.loc[:,
           "total_vaccinations"] = df.loc[:,
                                          "people_vaccinated"] + df.loc[:,
                                                                        "people_fully_vaccinated"]

    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)

    # Export
    export_data(df=df,
                data_url_reference=DATA_URL_REFERENCE,
                output_file=OUTPUT_FILE)
def main():
    # Load current data
    df_source = pd.read_csv(OUTPUT_FILE)

    # Locate newest pdf
    html_page = urllib.request.urlopen(DATA_URL)
    soup = BeautifulSoup(html_page, "html.parser")
    pdf_path = soup.find("a", text="Download her").get("href")  # Get path to newest pdf
    # Get preliminary dataframe
    column_string = {
        "dtype": str,
        "header": None,
    }  # Force dtype to be object because of thousand separator
    kwargs = {"pandas_options": column_string}
    dfs_from_pdf = tabula.read_pdf(
        pdf_path, pages="all", **kwargs
    )  #  len(dfs_from_pdf) == 8 ?
    # date = datetime.strptime(pdf_path.split("-")[-2], "%d%m%Y").strftime("%Y-%m-%d")
    date = get_date(dfs_from_pdf)

    # Get preliminary dataframe
    df = dfs_from_pdf[3]  #  Hardcoded

    if df.shape != (11, 7):
        raise Exception("Shape of table changed!")
    if not all(region in df[0].tolist() for region in regions):
        raise Exception("Region missing!")

    #  Drop columns
    df = df.drop([0, 1, 2, 3, len(df) - 1])
    # Rename columns
    df = df.rename(
        columns={0: "region", 2: "people_vaccinated", 4: "people_fully_vaccinated"}
    )
    df = df.astype(str)

    # Remove numeric 1000-separator
    df.loc[:, "people_vaccinated"] = (
        df.loc[:, "people_vaccinated"]
        .apply(lambda x: int(x.replace(".", "")))
        .fillna(0)
        .astype(int)
    )

    def del_separator(x):
        if x != "nan":
            return int(x.replace(".", ""))
        else:
            return 0

    df.loc[:, "people_fully_vaccinated"] = (
        df.loc[:, "people_fully_vaccinated"]
        .apply(lambda x: del_separator(x))
        .astype("Int64")
    )

    # Process region column
    df.loc[:, "region"] = df.loc[:, "region"].replace(REGION_RENAMING)

    # Get new columns
    df.loc[:, "total_vaccinations"] = (
        df.loc[:, "people_vaccinated"] + df.loc[:, "people_fully_vaccinated"]
    )
    df.loc[:, "location"] = COUNTRY
    df.loc[:, "date"] = date

    # Add ISO codes
    df = ISODB().merge(df, country_iso=COUNTRY_ISO)
    df.loc[df["region"] == "Others", "location_iso"] = COUNTRY_ISO

    #  Concat
    df_source = df_source.loc[~(df_source.loc[:, "date"] == date)]
    df = pd.concat([df, df_source])

    #  Export
    export_data(df=df, data_url_reference=DATA_URL_REFERENCE, output_file=OUTPUT_FILE)