def _export_metadata(self, df, output_path): export_metadata( df=df, source_name= "European Centre for Disease Prevention and Control (ECDC)", source_url=self.source_url_ref, output_path=output_path)
def to_csv(self, paths): df_base = self.read().pipe(self.pipeline_base) # Export data df = df_base.pipe(self.pipeline) df.to_csv(paths.tmp_vax_out(self.location), index=False) # Export manufacturer data df = df_base.pipe(self.pipeline_manufacturer) df.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata(df, "Robert Koch Institut", self.source_url_ref, paths.tmp_vax_metadata_man)
def to_csv(self, paths): df_base = self.read().pipe(self.pipeline_base) # Export data df = df_base.copy().pipe(self.pipeline) df.to_csv(paths.tmp_vax_out(self.location), index=False) # Export manufacturer data df = df_base.copy().pipe(self.pipeline_manufacturer) df.to_csv(paths.tmp_vax_out_man(f"{self.location}"), index=False) export_metadata(df, "Government of Romania via datelazi.ro", self.source_url, paths.tmp_vax_metadata_man)
def to_csv(self, paths): """Generalized.""" df_base = self.read() # Main data df = df_base.pipe(self.pipeline) df.to_csv(paths.tmp_vax_out(self.location), index=False) # Age data df_age = df_base.pipe(self.pipeline_age) df_age.to_csv(paths.tmp_vax_out_by_age_group(self.location), index=False) export_metadata(df_age, "Government of Jersey", self.source_url, paths.tmp_vax_metadata_age)
def to_csv(self, paths): df = self.read() df_base = df.pipe(self.pipe_base) # Main data df_base.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location), index=False) # Manufacturer data df_man = df_base.pipe(self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata(df_man, "National Health Service", self.source_url, paths.tmp_vax_metadata_man)
def export(self, paths): df = self.read().pipe(self.base_pipeline) # Drop total_vaccinations == 0 rows added by groupby. df = df.drop(df[df.total_vaccinations == 0].index).reset_index() # Manufacturer df.pipe(self.pipeline_manufacturer).to_csv(paths.tmp_vax_out_man( self.location), index=False) export_metadata(df, "Prime Minister of Japan and Hist Cabinet", self.source_url_2_ref, paths.tmp_vax_metadata_man) # Main data df.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location), index=False)
def to_csv(self, paths): df = self.read().pipe(self.pipeline_base) # Main data df.pipe(self.pipeline_vaccinations).to_csv(paths.tmp_vax_out( self.location), index=False) # Manufacturer df_man = df.pipe(self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata( df_man, "Ministerio de Ciencia, Tecnología, Conocimiento e Innovación", self.source_url_ref, paths.tmp_vax_metadata_man)
def to_csv(self, paths): vaccine_data, manufacturer_data = self.read() vaccine_data.pipe(self.pipeline, country_code="CH").to_csv( paths.tmp_vax_out("Switzerland"), index=False) vaccine_data.pipe(self.pipeline, country_code="FL").to_csv( paths.tmp_vax_out("Liechtenstein"), index=False) df_man = manufacturer_data.pipe(self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man("Switzerland"), index=False) export_metadata(df_man, "Federal Office of Public Health", self.source_url, paths.tmp_vax_metadata_man)
def to_csv(self, paths): # Load data df, df_age = self.read() # Export main df.pipe(self.pipeline).to_csv(paths.tmp_vax_out(self.location), index=False) # Export manufacturer data df_man = df.pipe(self.pipeline_manufacturer) df_man.to_csv(paths.tmp_vax_out_man(self.location), index=False) export_metadata(df_man, "Ministry of Health via vacuna.uy", self.source_url, paths.tmp_vax_metadata_man) # Export age data df_age = df_age.pipe(self.pipeline_age) df_age.to_csv(paths.tmp_vax_out_by_age_group(self.location), index=False) export_metadata(df_age, "Ministry of Health via vacuna.uy", self.source_url_age, paths.tmp_vax_metadata_age)
def main(paths): DATA_URL = ( "https://services3.arcgis.com/MF53hRPmwfLccHCj/arcgis/rest/services/" "covid_vaccinations_by_drug_name_new/FeatureServer/0/query") PARAMS = { 'f': 'json', 'where': "municipality_code='00'", 'returnGeometry': False, 'spatialRel': 'esriSpatialRelIntersects', 'outFields': 'date,vaccine_name,vaccination_state,vaccinated_cum', 'resultOffset': 0, 'resultRecordCount': 32000, 'resultType': 'standard' } res = requests.get(DATA_URL, params=PARAMS) data = [elem["attributes"] for elem in json.loads(res.content)["features"]] df = pd.DataFrame.from_records(data) df["date"] = pd.to_datetime(df["date"], unit="ms") # Correction for vaccinations wrongly attributed to early December 2020 df.loc[df.date < "2020-12-27", "date"] = pd.to_datetime("2020-12-27") # Reshape data df = df[(df.vaccination_state != "Dalinai") & (df.vaccinated_cum > 0)].copy() df.loc[df.vaccination_state == "Visi", "dose_number"] = 1 df.loc[df.vaccination_state == "Pilnai", "dose_number"] = 2 df = df.drop(columns="vaccination_state") # Data by vaccine vaccine_mapping = { "Pfizer-BioNTech": "Pfizer/BioNTech", "Moderna": "Moderna", "AstraZeneca": "Oxford/AstraZeneca", "Johnson & Johnson": "Johnson&Johnson" } assert set(df["vaccine_name"].unique()) == set(vaccine_mapping.keys()) df = df.replace(vaccine_mapping) vax = (df.groupby( ["date", "vaccine_name"], as_index=False)["vaccinated_cum"].sum().sort_values("date").rename( columns={ "vaccine_name": "vaccine", "vaccinated_cum": "total_vaccinations" })) vax["location"] = "Lithuania" vax.to_csv(paths.tmp_vax_out_man("Lithuania"), index=False) export_metadata(vax, "Ministry of Health", DATA_URL, paths.tmp_vax_metadata_man) # Unpivot df = (df.groupby( ["date", "dose_number", "vaccine_name"], as_index=False).sum().pivot( index=["date", "vaccine_name"], columns="dose_number", values="vaccinated_cum").fillna(0).reset_index().rename( columns={ 1: "people_vaccinated", 2: "people_fully_vaccinated" }).sort_values("date")) # Total vaccinations df = df.assign(total_vaccinations=df.people_vaccinated + df.people_fully_vaccinated) # Single shot msk = df.vaccine_name == "Johnson & Johnson" df.loc[msk, "people_fully_vaccinated"] = df.loc[msk, "people_vaccinated"] # Group by date df = df.groupby("date").agg({ "people_fully_vaccinated": sum, "people_vaccinated": sum, "total_vaccinations": sum, "vaccine_name": lambda x: ", ".join(sorted(x)) }).rename(columns={ "vaccine_name": "vaccine" }).reset_index() df = df.replace(0, pd.NA) df.loc[:, "location"] = "Lithuania" df.loc[:, "source_url"] = ( "https://experience.arcgis.com/experience/cab84dcfe0464c2a8050a78f817924ca/page/page_3/" ) df.to_csv(paths.tmp_vax_out("Lithuania"), index=False)
def main(paths): vaccine_mapping = { 1: "Pfizer/BioNTech", 2: "Moderna", 3: "Oxford/AstraZeneca", 4: "Johnson&Johnson", } one_dose_vaccines = ["Johnson&Johnson"] source = "https://www.data.gouv.fr/fr/datasets/r/b273cf3b-e9de-437c-af55-eda5979e92fc" df = pd.read_csv(source, usecols=["vaccin", "jour", "n_cum_dose1", "n_cum_dose2"], sep=";") df = df.rename( columns={ "vaccin": "vaccine", "jour": "date", "n_cum_dose1": "people_vaccinated", "n_cum_dose2": "people_fully_vaccinated", }) # Map vaccine names df = df[(df.vaccine.isin(vaccine_mapping.keys())) & (df.people_vaccinated > 0)] assert set(df["vaccine"].unique()) == set(vaccine_mapping.keys()) df["vaccine"] = df.vaccine.replace(vaccine_mapping) # Add total doses df["total_vaccinations"] = df.people_vaccinated + df.people_fully_vaccinated manufacturer = df[["date", "total_vaccinations", "vaccine"]].assign(location="France") manufacturer.to_csv(paths.tmp_vax_out_man("France"), index=False) export_metadata(manufacturer, "Public Health France", source, paths.tmp_vax_metadata_man) # Infer fully vaccinated for one-dose vaccines df.loc[df.vaccine.isin(one_dose_vaccines), "people_fully_vaccinated"] = df.people_vaccinated df = (df.groupby("date", as_index=False).agg({ "total_vaccinations": "sum", "people_vaccinated": "sum", "people_fully_vaccinated": "sum", "vaccine": lambda x: ", ".join(sorted(x)) })) df = df.assign( location="France", source_url= ("https://www.data.gouv.fr/fr/datasets/donnees-relatives-aux-personnes-vaccinees-contre-la-covid-19-1/" )) df.to_csv(paths.tmp_vax_out("France"), index=False)
def main(paths): url = "https://e.infogram.com/c3bc3569-c86d-48a7-9d4c-377928f102bf" soup = BeautifulSoup(requests.get(url).content, "html.parser") for script in soup.find_all("script"): if "infographicData" in str(script): json_data = (str(script).replace("<script>window.infographicData=", "").replace(";</script>", "")) json_data = json.loads(json_data) break data = (json_data["elements"]["content"]["content"]["entities"] ["39ac25a9-8af7-4d26-bd19-62a3696920a2"]["props"]["chartData"] ["data"][0]) df = pd.DataFrame(data[1:], columns=data[0]) assert set(df.iloc[:, 0]) == set( VACCINE_PROTOCOLS.keys()), "New vaccine found!" total_vaccinations = 0 people_vaccinated = 0 people_fully_vaccinated = 0 for row in df.iterrows(): protocol = VACCINE_PROTOCOLS[row[1][0]] if protocol == 1: fv = clean_count(row[1]["Fullbólusettir"]) total_vaccinations += fv people_vaccinated += fv people_fully_vaccinated += fv elif protocol == 2: fv = clean_count(row[1]["Fullbólusettir"]) pv = clean_count(row[1]["Bólusetning hafin"]) total_vaccinations += fv * 2 + pv people_vaccinated += fv + pv people_fully_vaccinated += fv date = json_data["updatedAt"][:10] increment( paths=paths, location="Iceland", total_vaccinations=total_vaccinations, people_vaccinated=people_vaccinated, people_fully_vaccinated=people_fully_vaccinated, date=date, source_url="https://www.covid.is/tolulegar-upplysingar-boluefni", vaccine="Johnson&Johnson, Moderna, Oxford/AstraZeneca, Pfizer/BioNTech" ) # By manufacturer data = (json_data["elements"]["content"]["content"]["entities"] ["e329559c-c3cc-48e9-8b7b-1a5f87ea7ad3"]["props"]["chartData"] ["data"][0]) df = pd.DataFrame(data[1:]).reset_index(drop=True) df.columns = ["date"] + data[0][1:] df = df.melt("date", var_name="vaccine", value_name="total_vaccinations") df["date"] = pd.to_datetime(df["date"], format="%d.%m.%y") df["total_vaccinations"] = pd.to_numeric(df["total_vaccinations"], errors="coerce").fillna(0) df["total_vaccinations"] = df.sort_values("date").groupby( "vaccine", as_index=False)["total_vaccinations"].cumsum() df["location"] = "Iceland" vaccine_mapping = { "Pfizer/BioNTech": "Pfizer/BioNTech", "Moderna": "Moderna", "Oxford/AstraZeneca": "Oxford/AstraZeneca", "Janssen": "Johnson&Johnson", } assert set(df["vaccine"].unique()) == set(vaccine_mapping.keys()), \ f"Vaccines present in data: {df['vaccine'].unique()}" df = df.replace(vaccine_mapping) df.to_csv(paths.tmp_vax_out_man("Iceland"), index=False) export_metadata(df, "Ministry of Health", url, paths.tmp_vax_metadata_man)
def main(paths): vaccine_mapping = { "Pfizer/BioNTech": "Pfizer/BioNTech", "Moderna": "Moderna", "Vaxzevria (AstraZeneca)": "Oxford/AstraZeneca", "Janssen": "Johnson&Johnson", } one_dose_vaccines = ["Johnson&Johnson"] url = ( "https://raw.githubusercontent.com/italia/covid19-opendata-vaccini/master/dati/" "somministrazioni-vaccini-latest.csv") df = pd.read_csv(url, usecols=[ "data_somministrazione", "fornitore", "fascia_anagrafica", "prima_dose", "seconda_dose", "pregressa_infezione", ]) assert set(df["fornitore"].unique()) == set(vaccine_mapping.keys()) df = df.replace(vaccine_mapping) df["total_vaccinations"] = df["prima_dose"] + df["seconda_dose"] + df[ "pregressa_infezione"] df["people_vaccinated"] = df["prima_dose"] + df["pregressa_infezione"] df = df.rename( columns={ "data_somministrazione": "date", "fornitore": "vaccine", "fascia_anagrafica": "age_group" }) # df_age_group = df.copy() # Data by manufacturer by_manufacturer = (df.groupby( ["date", "vaccine"], as_index=False)["total_vaccinations"].sum().sort_values("date")) by_manufacturer["total_vaccinations"] = by_manufacturer.groupby( "vaccine")["total_vaccinations"].cumsum() by_manufacturer["location"] = "Italy" by_manufacturer.to_csv(paths.tmp_vax_out_man("Italy"), index=False) export_metadata(by_manufacturer, "Extraordinary commissioner for the Covid-19 emergency", url, paths.tmp_vax_metadata_man) # Vaccination data df = df.rename(columns={ "seconda_dose": "people_fully_vaccinated", }) df.loc[df.vaccine.isin(one_dose_vaccines), "people_fully_vaccinated"] = df.people_vaccinated df = (df.groupby("date", as_index=False)[[ "total_vaccinations", "people_vaccinated", "people_fully_vaccinated" ]].sum().sort_values("date")) df[["total_vaccinations", "people_vaccinated", "people_fully_vaccinated"]] = (df[[ "total_vaccinations", "people_vaccinated", "people_fully_vaccinated" ]].cumsum()) df.loc[:, "location"] = "Italy" df.loc[:, "source_url"] = url df.loc[:, "vaccine"] = ", ".join(sorted(vaccine_mapping.values())) df.to_csv(paths.tmp_vax_out("Italy"), index=False)