def read(self) -> pd.DataFrame: params = load_query("ireland-metrics", to_str=False) data_primary = request_json(self.source_url["primary"], params=params) data_primary = self._parse_data_primary(data_primary) data_booster = request_json(self.source_url["booster"], params=params) data_booster = self._parse_data_boosters(data_booster) return pd.merge(data_primary, data_booster, how="outer", on="date", validate="one_to_one")
def read(self) -> pd.DataFrame: """Reads data from source.""" data = request_json(self.source_url) df = pd.json_normalize(data, record_path=[ "features" ]).dropna(subset=["attributes.unique_public_private_test"]) return df
def read(self): data = request_json(self.source_url[self.source_name]) if self.source_name == "mohfw": return self.read_mohfw(data) elif self.source_name == "cowin": return self.read_cowin(data) raise ValueError(f"Not valid class attribute `source_name`: {self.source_name}")
def _get_file_url(self) -> str: response = request_json("https://www.covid19.admin.ch/api/data/context") context = response["sources"]["individual"]["csv"] doses_url = context["vaccDosesAdministered"] people_url = context["vaccPersonsV2"] manufacturer_url = context["weeklyVacc"]["byVaccine"]["vaccDosesAdministered"] return doses_url, people_url, manufacturer_url
def _parse_data(self) -> pd.Series: data = request_json(self.source_url)["stats"] data = pd.DataFrame.from_records(data, columns=["tested"]).iloc[0] return { "count": clean_count(data[0]), "date": localdate("Atlantic/Faeroe"), }
def _parse_last_update_date(self): field_name = "lastUpdated" date_json = request_json(self.source_url_date) if field_name in date_json: date_raw = date_json[field_name] return datetime.fromisoformat(date_raw).date() raise ValueError(f"{field_name} field not found!")
def main(paths): url = "https://services6.arcgis.com/bKYAIlQgwHslVRaK/arcgis/rest/services/Vaccination_Individual_Total/FeatureServer/0/query?f=json&cacheHint=true&outFields=*&resultType=standard&returnGeometry=false&spatialRel=esriSpatialRelIntersects&where=1%3D1" data = request_json(url) df = pd.DataFrame.from_records(elem["attributes"] for elem in data["features"]) df = df.drop(columns=["ObjectId", "LastValue", "Total_Individuals"]) df = df.rename( columns={ "Reportdt": "date", "Total_Vaccinations": "total_vaccinations", "FirstDose": "people_vaccinated", "SecondDose": "people_fully_vaccinated", }) df["date"] = pd.to_datetime(df.date, unit="ms").dt.date.astype(str) df = df.groupby("date", as_index=False).max() df.loc[:, "location"] = "Saudi Arabia" df.loc[:, "vaccine"] = "Pfizer/BioNTech" df.loc[df.date >= "2021-02-18", "vaccine"] = "Oxford/AstraZeneca, Pfizer/BioNTech" df.loc[:, "source_url"] = "https://covid19.moh.gov.sa/" df = df[df.total_vaccinations > 0].sort_values("date") # The data contains an error that creates a negative change df = df[df.date != "2021-03-03"] df.to_csv(paths.tmp_vax_out("Saudi Arabia"), index=False)
def read(self) -> pd.DataFrame: data = request_json(self.source_url) df = pd.DataFrame.from_records(data["data"]) check_known_columns( df, [ "date", "change_cases", "change_fatalities", "change_tests", "change_hospitalizations", "change_criticals", "change_recoveries", "change_vaccinations", "change_vaccinated", "change_boosters_1", "change_boosters_2", "change_vaccines_distributed", "total_cases", "total_fatalities", "total_tests", "total_hospitalizations", "total_criticals", "total_recoveries", "total_vaccinations", "total_vaccinated", "total_boosters_1", "total_boosters_2", "total_vaccines_distributed", ], ) return df[["date", "total_vaccinations", "total_vaccinated", "total_boosters_1", "total_boosters_2"]]
def get_data(self) -> pd.DataFrame: json_data = request_json(DATA_URL, params=PARAMS) df = pd.DataFrame( [feat["attributes"] for feat in json_data["features"]]) df["reportdt"] = df["reportdt"].astype(int).apply( lambda dt: datetime.datetime.utcfromtimestamp(dt / 1000)) df = df.rename(columns={"totalTests": "Cumulative total"}) df["Cumulative total"] = df["Cumulative total"].astype(int) # KLUDGE: there are a few days with two reports on the same day (but at # different times, like 10am vs 10pm). Upon inspection, it appears that the # latter reports (e.g. the 10pm reports) actually correspond to official cumulative # totals for the subsequent day (as determined by comparing to official updates # published on Twitter and Facebook). So I increment the date of these latter # reports by one. df = df.sort_values("reportdt") duplicate_idx = df.index[df["reportdt"].dt.date.duplicated( keep="first")] for idx in duplicate_idx: df.loc[idx, "reportdt"] = df.loc[idx, "reportdt"] + datetime.timedelta( days=1) df["Date"] = df["reportdt"].dt.strftime("%Y-%m-%d") df = df[["Date", "Cumulative total"]] df = df[df["Cumulative total"] > 0] df = df.groupby("Cumulative total", as_index=False).min() df = df.groupby("Date", as_index=False).min() # manual fix: drop incorrect data point on 2021-10-30 df = df.drop(index=df[df["Date"] == "2021-10-30"].index.values) df = make_monotonic(df) return df
def read(self) -> pd.DataFrame: data = request_json(self.source_url) df = pd.DataFrame.from_records(data) check_known_columns( df, [ "Day_Date", "vaccinated", "vaccinated_cum", "vaccinated_population_perc", "vaccinated_seconde_dose", "vaccinated_seconde_dose_cum", "vaccinated_seconde_dose_population_perc", "vaccinated_third_dose", "vaccinated_third_dose_cum", "vaccinated_third_dose_population_perc", "vaccinated_fourth_dose_population_perc", "vaccinated_fourth_dose", "vaccinated_validity_perc", "vaccinated_expired_perc", "not_vaccinated_perc", "vaccinated_fourth_dose_cum", ], ) return df
def read(self) -> pd.DataFrame: data = request_json(self.source_url) return (pd.DataFrame.from_dict( data["historicalData"], orient="index", columns=["vaccines", "numberTotalDosesAdministered"], ).reset_index().dropna().sort_values(by="index"))
def read(self) -> pd.DataFrame: data = request_json(self.source_url) df = pd.DataFrame.from_dict(data["historicalData"], orient="index") check_known_columns( df, [ "parsedOn", "parsedOnString", "fileName", "complete", "averageAge", "numberInfected", "numberCured", "numberDeceased", "percentageOfWomen", "percentageOfMen", "percentageOfChildren", "numberTotalDosesAdministered", "distributionByAge", "countyInfectionsNumbers", "incidence", "large_cities_incidence", "small_cities_incidence", "vaccines", ], ) return df[["vaccines", "numberTotalDosesAdministered" ]].reset_index().dropna().sort_values(by="index")
def read(self) -> pd.DataFrame: """Reads data from source.""" data = request_json(self.source_url, params=self.params) df = pd.json_normalize(data, record_path=[ "features" ]).dropna(subset=["attributes.cumulative_test"]) return df
def _get_api_value(self, query: str): query = json.loads(query) data = request_json(self.source_url, json=query, headers=self.headers, request_method="post") value = int(data["hits"]["total"]) return value
def read(self) -> pd.DataFrame: json_dict = request_json(self.source_url) df = pd.DataFrame(json_dict)[[ "datum", "entries", "entries_pos", "nachweismethode", "geoRegion" ]] # [["datum", "entries", "geoRegion"]] df = df[df.geoRegion == "FL"] return df
def read(self) -> pd.DataFrame: data = request_json(self.source_url) return pd.DataFrame.from_records(data["data"], columns=[ "date", "total_vaccinations", "total_vaccinated", "total_boosters_1" ])
def get_api_value(source: str, query: str, headers: dict): query = json.loads(query) data = request_json(source, json=query, headers=headers, request_method="post") value = int(data["hits"]["total"]) return value
def read(self) -> pd.DataFrame: data = request_json(self.source_url) data = [x["value"] for x in data["rows"]] df = pd.DataFrame.from_records(data, columns=["report_time", "samples" ]) # Load only columns needed df["report_time"] = clean_date_series( df["report_time"], "%Y-%m-%dT%H:%M:%S.%f%z") # Clean date return df
def _read_antigens(self): ## Antigen url = "https://atlas.jifo.co/api/connectors/425b93dc-c055-477c-b81a-5d4d9a1275f7" data = request_json(url)["data"][4] df = pd.DataFrame.from_records(data[1:], columns=data[0]) # Clean df = df.assign(Date=clean_date_series(df[""], "%d/%m/%Y")) df["Positivas"] = df["Positivas"].apply(clean_count) df["Total Px Ag"] = df["Total Px Ag"].apply(clean_count) return df
def read(self) -> pd.DataFrame: data = request_json(self.source_url) df = pd.DataFrame.from_records(data, columns=[ "Date_of_statistics", "Tested_with_result", "Security_region_name", "Tested_positive" ]) df = df.groupby("Date_of_statistics").sum().reset_index() return df
def _read_art(self): url = f"{self.base_url}1ee4d904-b17e-41de-a731-854578b036e6" json_dict = request_json(url)["result"]["records"] df = pd.DataFrame.from_records(json_dict).drop(columns=["_id"]) # correct errors df.loc[(df[df["week_of"] == "14/12/2022"].index.values), "week_of"] = "14/12/2021" df.loc[(df[df["week_of"] == "28/12/2022"].index.values), "week_of"] = "28/12/2021" df["week_of"] = clean_date_series(df["week_of"], "%d/%m/%Y") return df
def read(self): data = request_json(self.source_url)["features"][0]["attributes"] date = clean_date(datetime.fromtimestamp(data["EditDate"] / 1000)) return pd.Series({ "total_vaccinations": data["Vaccine_total"], "people_fully_vaccinated": data["Vaccine_total_last24"], "date": date, })
def read(source: str) -> pd.Series: data = request_json(source) for count in data: if count[0] == "2nd Vaccine taken": people_fully_vaccinated = count[1] if count[0] == "1st Vaccine taken": dose1_only = count[1] people_vaccinated = dose1_only + people_fully_vaccinated return pd.Series({ "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, })
def read(source: str) -> pd.DataFrame: data = request_json(source) return pd.DataFrame.from_dict([{ "date": d["date"], "people_vaccinated": d["firstDose"]["cumulative"]["total"], "people_fully_vaccinated": d["secondDose"]["cumulative"]["total"], "total_vaccinations": d["totalDose"]["cumulative"]["total"], "total_pfizer": d["totalDose"]["cumulative"]["biontech"], "total_sinovac": d["totalDose"]["cumulative"]["sinovac"], } for d in data])
def read(source: str) -> pd.Series: data = request_json(source) total_vaccinations = int(data["CijepljenjeBrUtrosenihDoza"]) people_vaccinated = int(data["CijepljeniJednomDozom"]) people_fully_vaccinated = int(data["CijepljeniDvijeDoze"]) date = str((pd.to_datetime(data["Datum"]) - timedelta(days=1)).date()) return pd.Series( data={ "total_vaccinations": total_vaccinations, "people_vaccinated": people_vaccinated, "people_fully_vaccinated": people_fully_vaccinated, "date": date, })
def read(self) -> pd.DataFrame: data = request_json(self.source_url) res = [d["attributes"] for d in data["features"]] df = pd.DataFrame( res, columns=[ "ADM0_SOVRN", "ISO_3_CODE", "TotAmtAdmi", "VacAd1Dose", "VacAd2Dose", "FullyVacc", "VaccApprov", ], ) return df
def _api_request(self): date_low = localdatenow(self.timezone) date_up = localdatenow(self.timezone, sum_days=1) params = { "f": "json", "outFields": "*", "outStatistics": ( "[{'onStatisticField':'total_dosis_adminsitradas','outStatisticFieldName':'total_vaccinations','statisticType':'sum'}," "{'onStatisticField':'total_primera_dosis','outStatisticFieldName':'dose_1','statisticType':'sum'}," "{'onStatisticField':'total_segunda_dosis','outStatisticFieldName':'dose_2','statisticType':'max'}]" ), "returnGeometry": "false", "where": f"fecha BETWEEN timestamp '{date_low} 05:00:00' AND timestamp '{date_up} 04:59:59'", } data = request_json(self.source_url, params=params) return data
def read(self): data = request_json(self.source_url) df = pd.DataFrame.from_records(elem["attributes"] for elem in data["features"]) check_known_columns( df, [ "Reportdt", "Total_Vaccinations", "Total_Individuals", "LastValue", "ObjectId", "Elderly", "FirstDose", "SecondDose", "BoosterDose", ], ) return df
def extract(self) -> dict: data = request_json(self.source_url) data = list(filter(lambda x: x["region"] == "World", data["regions"]))[0]["distributions"] return data
def read(self) -> pd.DataFrame: columns = "%2C".join(self.columns_rename.keys()) url = f"https://services9.arcgis.com/DnERH4rcjw7NU6lv/arcgis/rest/services/Vaccine_Distribution_Program/FeatureServer/2/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&resultType=none&distance=0.0&units=esriSRUnit_Meter&returnGeodetic=false&outFields={columns}&returnGeometry=true&featureEncoding=esriDefault&multipatchOption=xyFootprint&maxAllowableOffset=&geometryPrecision=&outSR=&datumTransformation=&applyVCSProjection=false&returnIdsOnly=false&returnUniqueIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&returnQueryGeometry=false&returnDistinctValues=false&cacheHint=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&having=&resultOffset=&resultRecordCount=&returnZ=false&returnM=false&returnExceededLimitFeatures=true&quantizationParameters=&sqlFormat=none&f=pjson&token=" data = request_json(url) return pd.DataFrame.from_records(elem["attributes"] for elem in data["features"])