def get_state_timeseries(states=["Tamil Nadu"], download: bool = False) -> pd.DataFrame: paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 25)] } if download: for target in paths['v3'] + paths['v4']: download_data(data, target) return load_all_data(v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']])\ .query("detected_state in @states" if states != "*" else "detected_state != 'NULL'", engine = "python")\ .pipe(lambda _: get_time_series(_, ["detected_state", "detected_district"]))\ .drop(columns = ["date", "time", "delta", "logdelta"])\ .rename(columns = { "Deceased": "dD", "Hospitalized": "dT", "Recovered": "dR" })
def get_state_timeseries( states = "*", download: bool = False, aggregation_cols = ["detected_state", "detected_district"], last_API_file: int = 27) -> pd.DataFrame: """ load state- and district-level data, downloading source files if specified """ paths = {"v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, last_API_file)]} if download: for target in paths['v3'] + paths['v4']: download_data(data, target) return load_all_data(v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']])\ .query("detected_state in @states" if states != "*" else "detected_state != 'NULL'")\ .pipe(lambda _: get_time_series(_, aggregation_cols))\ .drop(columns = ["date", "time", "delta", "logdelta"])\ .rename(columns = { "Deceased": "dD", "Hospitalized": "dT", "Recovered": "dR" })
import pandas as pd from epimargin.etl.commons import download_data from epimargin.etl.covid19india import data_path, get_time_series, load_all_data from epimargin.utils import setup data, _ = setup() paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 18)] } for target in paths['v3'] + paths['v4']: download_data(data, target) df = load_all_data(v3_paths=[data / filepath for filepath in paths['v3']], v4_paths=[data / filepath for filepath in paths['v4']]) schema = {"Deceased": "dD", "Recovered": "dR", "Hospitalized": "dT"} def assemble_time_series(df): ts = get_time_series(df) deltas = ts[schema.keys()]\ .rename(columns = schema) deltas = deltas.reindex(pd.date_range(deltas.index.min(), deltas.index.max()), fill_value=0) merged = deltas.merge(deltas.cumsum(axis=0).rename(columns=lambda _: _[1]), left_index=True, right_index=True).astype(int) merged.index.name = "date"
plt.legend(loc = 'upper right') plt.PlotDevice().ylabel("new cases", rotation = -90, labelpad = 50) plt.ylim(bottom = 0) plt.sca(lax) plt.show() # cases vs deaths from pathlib import Path data = Path("./data") paths = {"v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 27)]} for target in paths['v3'] + paths['v4']: download_data(data, target) df = load_all_data(v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']])\ .pipe(lambda _: get_time_series(_, ["detected_state"]))\ .drop(columns = ["date", "time", "delta", "logdelta"])\ .rename(columns = { "Deceased": "dD", "Hospitalized": "dT", "Recovered": "dR" }).sum(level = -1).sort_index() plt.plot(df.index, smoothed(df.dD.values), label = "Daily Deaths", color = plt.RED) plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = 200, fontdict = plt.theme.note, ha = "center", va = "top") plt.legend(loc = 'upper left') plt.ylim(bottom = 0) lax = plt.gca() plt.sca(lax.twinx()) plt.plot(df.index, smoothed(df.dT.values), label = "Daily Cases", color = plt.PRED_PURPLE) plt.legend(loc = 'upper right') plt.PlotDevice().ylabel("new cases", rotation = -90, labelpad = 50) plt.ylim(bottom = 0) plt.sca(lax)
sero = pd.read_stata("data/kadata.labdate.dta")\ .drop(columns = ["_merge"])\ sero["S"] = sero["elisa_pos15"] sero["t0"] = sero["date_med"] sero["td"] = sero["t0"] + pd.Timedelta(days=30) sero["hr"] = sero.hom_region.map(hom_regions_numeric) # pull down COVID 19 India data paths = { "v3": [data_path(i) for i in (1, 2)], "v4": [data_path(i) for i in range(3, 19)] } # for target in paths['v3'] + paths['v4']: # download_data(data, target) df = load_all_data(v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']])\ .query("detected_state == 'Karnataka'") # get all deaths in KA on Aug 29 by district get_time_series(df, "detected_district")\ .query("status_change_date <= 'Aug 29, 2020'", engine = "python")\ .Deceased.sum(level = 0)\ .drop("Other State")\ .astype(int)\ .to_csv(data/"ka_cumulative_deaths_aug29.csv") # aggregate time series by hom_region df["detected_region"] = df.detected_district.map(hom_regions_rev) ka_ts = get_time_series(df.dropna(subset=["detected_region"]), "detected_region").rename(columns={ "Deceased": "dD", "Hospitalized": "dT",