CI = 0.95 state_cases = pd.read_csv(data / "Bihar_cases_data_Oct03.csv", parse_dates=["date_reported"], dayfirst=True) state_ts = state_cases["date_reported"].value_counts().sort_index() district_names, population_counts, _ = etl.district_migration_matrix( data / "Migration Matrix - District.csv") populations = dict(zip(district_names, population_counts)) # first, look at state level predictions (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS( state_ts, CI=CI, smoothing=notched_smoothing(window=smoothing), totals=False) plt.Rt(dates, Rt_pred[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], CI, ymin=0, ymax=4)\ .title("\nBihar: Reproductive Number Estimate")\ .annotate(f"data from {str(dates[0]).split()[0]} to {str(dates[-1]).split()[0]}")\ .xlabel("date")\ .ylabel("$R_t$", rotation=0, labelpad=20)\ .show() np.random.seed(33) Bihar = SIR("Bihar", 99_000_000, dT0=T_pred[-1], Rt0=Rt_pred[-1], lower_CI=T_CI_lower[-1],
from epimargin.smoothing import notched_smoothing """ Common data loading/cleaning functions and constants """ data = (Path(__file__).parent / "../data").resolve() print(data) USD = 1 / 72 age_bin_labels = ["0-17", "18-29", "30-39", "40-49", "50-59", "60-69", "70+"] # Rt estimation parameters CI = 0.95 window = 14 gamma = 0.2 infectious_period = 5 smooth = notched_smoothing(window) # simulation parameters simulation_start = pd.Timestamp("Jan 1, 2021") num_sims = 10000 # common vaccination parameters immunity_threshold = 0.75 Rt_threshold = 0.2 # misc state = "TN" survey_date = "October 23, 2020" # palette TN_color = "firebrick"
def run_estimates(request): state_code = get(request, 'state_code') state = state_code_lookup[state_code] print(f"Rt estimation for {state} ({state_code}) started") bucket = storage.Client().bucket(bucket_name) bucket.blob("pipeline/commons/refs/all_crosswalk.dta")\ .download_to_filename("/tmp/all_crosswalk.dta") bucket.blob("pipeline/raw/states.csv")\ .download_to_filename("/tmp/states.csv") bucket.blob("pipeline/raw/districts.csv")\ .download_to_filename("/tmp/districts.csv") crosswalk = pd.read_stata("/tmp/all_crosswalk.dta") district_cases = pd.read_csv("/tmp/districts.csv")\ .rename(columns = str.lower)\ .set_index(["state", "district", "date"])\ .sort_index()\ .rename(index = lambda s: s.replace(" and ", " & "), level = 0)\ .loc[state] state_cases = pd.read_csv("/tmp/states.csv")\ .rename(columns = str.lower)\ .set_index(["state", "date"])\ .sort_index()\ .rename(index = lambda s: s.replace(" and ", " & "), level = 0)\ .loc[state] print(f"Estimating state-level Rt for {state_code}") normalized_state = state.replace(" and ", " And ").replace(" & ", " And ") lgd_state_name, lgd_state_id = crosswalk.query( "state_api == @normalized_state").filter( like="lgd_state").drop_duplicates().iloc[0] try: (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, *_) = analytical_MPVS(state_cases.iloc[-lookback:-cutoff].confirmed, CI=CI, smoothing=notched_smoothing(window=smoothing), totals=True) pd.DataFrame(data = { "dates": dates[1:], "Rt_pred": Rt_pred, "Rt_CI_upper": Rt_CI_upper, "Rt_CI_lower": Rt_CI_lower, "T_pred": T_pred, "T_CI_upper": T_CI_upper, "T_CI_lower": T_CI_lower, "total_cases": total_cases[2:], "new_cases_ts": new_cases_ts, })\ .assign(state = state, lgd_state_name = lgd_state_name, lgd_state_id = lgd_state_id)\ .to_csv("/tmp/state_Rt.csv") # upload to cloud bucket.blob( f"pipeline/est/{state_code}_state_Rt.csv").upload_from_filename( "/tmp/state_Rt.csv", content_type="text/csv") except Exception as e: print(f"ERROR when estimating Rt for {state_code}", e) print(traceback.print_exc()) if normalized_state in dissolved_states: print(f"Skipping district-level Rt for {state_code}") else: print(f"Estimating district-level Rt for {state} ({state_code})") estimates = [] for district in filter( lambda _: _.strip() not in excluded, district_cases.index.get_level_values(0).unique()): print(f"running estimation for [{district}]") lgd_district_data = crosswalk.query( "state_api == @normalized_state & district_api == @district" ).filter(like="lgd_district").drop_duplicates() if not lgd_district_data.empty: lgd_district_name, lgd_district_id = lgd_district_data.iloc[0] else: lgd_district_name, lgd_district_id = lgd_state_name, lgd_state_id try: (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, *_) = analytical_MPVS( district_cases.loc[district].iloc[-lookback:-cutoff]. confirmed, CI=CI, smoothing=notched_smoothing(window=smoothing), totals=True) estimates.append( pd.DataFrame( data={ "dates": dates[1:], "Rt_pred": Rt_pred, "Rt_CI_upper": Rt_CI_upper, "Rt_CI_lower": Rt_CI_lower, "T_pred": T_pred, "T_CI_upper": T_CI_upper, "T_CI_lower": T_CI_lower, "total_cases": total_cases[2:], "new_cases_ts": new_cases_ts, }).assign(state=state, lgd_state_name=lgd_state_name, lgd_state_id=lgd_state_id, district=district, lgd_district_name=lgd_district_name, lgd_district_id=lgd_district_id)) except Exception as e: print(f"ERROR when estimating Rt for {district}, {state_code}", e) print(traceback.print_exc()) pd.concat(estimates).to_csv("/tmp/district_Rt.csv") # upload to cloud bucket.blob( f"pipeline/est/{state_code}_district_Rt.csv").upload_from_filename( "/tmp/district_Rt.csv", content_type="text/csv") return "OK!"
from epimargin.estimators import analytical_MPVS, linear_projection from epimargin.etl.commons import download_data from epimargin.models import SIR, NetworkedSIR from epimargin.policy import simulate_PID_controller from epimargin.smoothing import notched_smoothing from epimargin.utils import days, setup from mpl_toolkits.axes_grid1 import make_axes_locatable from tqdm import tqdm logger = getLogger("DKIJ") # model/sim details gamma = 0.2 window = 7 CI = 0.95 smoothing = notched_smoothing(window = window) dkij_drop_cols = [ 'age', 'sex', 'fever', 'temp', 'cough', 'flu', 'sore_throat', 'shortness_breath', 'shivering', 'headache', 'malaise', 'muscle_pain', 'nausea_vomiting', 'abdominal_pain', 'diarrhoea', 'date_recovered', 'date_died', 'heart_disease', 'diabetes', 'pneumonia', 'hypertension', 'malignant', 'immunology_disorder', 'chronic_kidney', 'chronic_liver', 'copd', 'obesity', 'pregnant', 'tracing', 'otg', 'icu', 'intubation', 'ecmo', 'criteria_cases', 'age_group', 'age_group2', 'date_discharge', 'patient_status', 'death' ] shp_drop_cols = ['GID_0', 'NAME_0', 'GID_1', 'NAME_1', 'NL_NAME_1', 'GID_2', 'VARNAME_2', 'NL_NAME_2', 'TYPE_2', 'ENGTYPE_2', 'CC_2', 'HASC_2'] (data, figs) = setup(level = "INFO") dkij = pd.read_stata(data/"dkijakarta_180820.dta")\
state_cases = state_cases[state_cases.date_reported <= "2020-09-30"] state_ts = state_cases["date_reported"].value_counts().sort_index() district_ts = state_cases.groupby( ["geo_reported", "date_reported"])["date_reported"].count().sort_index() districts, pops, migrations = etl.district_migration_matrix( data / "Migration Matrix - District.csv") districts = sorted([etl.replacements.get(dn, dn) for dn in districts]) R_mandatory = dict() for district in districts: #district_ts.index.get_level_values(0).unique(): try: (_, Rt, *_) = analytical_MPVS(district_ts.loc[district], CI=CI, smoothing=notched_smoothing(window=10), totals=False) Rm = np.mean(Rt) except ValueError as v: Rm = 1.5 R_mandatory[district] = Rm R_voluntary = {district: 1.2 * R for (district, R) in R_mandatory.items()} si, sf = 0, 10 simulation_results = [ run_policies(state_cases, pops, districts, migrations,
import epimargin.plots as plt import numpy as np import pandas as pd import seaborn as sns from epimargin.smoothing import notched_smoothing from epimargin.etl.commons import download_data from epimargin.etl.covid19india import data_path, load_all_data, get_time_series sns.set_style("whitegrid", {'axes.grid' : False}) smoothed = notched_smoothing(window = 7) mobility = pd.concat([ pd.read_csv("data/2020_IN_Region_Mobility_Report.csv", parse_dates=["date"]), pd.read_csv("data/2021_IN_Region_Mobility_Report.csv", parse_dates=["date"]) ]) stringency = pd.read_csv("data/OxCGRT_latest.csv", parse_dates=["Date"]) def plot_mobility(series, label, stringency = None, until = None, annotation = "Google Mobility Data; baseline mobility measured from Jan 3 - Feb 6"): plt.plot(series.date, smoothed(series.retail_and_recreation_percent_change_from_baseline), label = "Retail/Recreation") plt.plot(series.date, smoothed(series.grocery_and_pharmacy_percent_change_from_baseline), label = "Grocery/Pharmacy") plt.plot(series.date, smoothed(series.parks_percent_change_from_baseline), label = "Parks") plt.plot(series.date, smoothed(series.transit_stations_percent_change_from_baseline), label = "Transit Stations") plt.plot(series.date, smoothed(series.workplaces_percent_change_from_baseline), label = "Workplaces") plt.plot(series.date, smoothed(series.residential_percent_change_from_baseline), label = "Residential") if until: right = pd.Timestamp(until) elif stringency is not None: right = stringency.Date.max() else: right = series.date.iloc[-1]
def setup(district) -> Tuple[Callable[[str], SIR], pd.DataFrame]: demographics = simulation_initial_conditions.loc[district] dR_conf = ts.loc[district].dR dR_conf = dR_conf.reindex(pd.date_range(dR_conf.index.min(), dR_conf.index.max()), fill_value = 0) dR_conf_smooth = pd.Series(smooth(dR_conf), index = dR_conf.index).clip(0).astype(int) R_conf_smooth = dR_conf_smooth.cumsum().astype(int) R0 = R_conf_smooth[data_recency] dD_conf = ts.loc[district].dD dD_conf = dD_conf.reindex(pd.date_range(dD_conf.index.min(), dD_conf.index.max()), fill_value = 0) dD_conf_smooth = pd.Series(smooth(dD_conf), index = dD_conf.index).clip(0).astype(int) D_conf_smooth = dD_conf_smooth.cumsum().astype(int) D0 = D_conf_smooth[data_recency] dT_conf = ts.loc[district].dT dT_conf = dT_conf.reindex(pd.date_range(dT_conf.index.min(), dT_conf.index.max()), fill_value = 0) ( dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, *_ ) = analytical_MPVS(ts.loc[district].dT, CI = CI, smoothing = notched_smoothing(window = smoothing), totals = False) Rt_estimates = pd.DataFrame(data = { "dates" : dates, "Rt_pred" : Rt_pred, "Rt_CI_upper" : Rt_CI_upper, "Rt_CI_lower" : Rt_CI_lower, "T_pred" : T_pred, "T_CI_upper" : T_CI_upper, "T_CI_lower" : T_CI_lower, "total_cases" : total_cases[2:], "new_cases_ts": new_cases_ts, }) dT_conf_smooth = pd.Series(smooth(dT_conf), index = dT_conf.index).clip(0).astype(int) T_conf_smooth = dT_conf_smooth.cumsum().astype(int) T0 = T_conf_smooth[data_recency] dT0 = dT_conf_smooth[data_recency] S0 = max(0, demographics.N_tot - T0) I0 = max(0, T0 - R0 - D0) return ( lambda seed = 0: SIR( name = district, mortality = demographics[[f"N_{i}" for i in range(7)]] @ np.array(list(TN_IFRs.values()))/demographics.N_tot, population = demographics.N_tot, random_seed = seed, infectious_period = 10, S0 = S0, I0 = I0, R0 = R0, D0 = D0, dT0 = dT0, Rt0 = Rt_estimates.set_index("dates").loc[data_recency].Rt_pred * demographics.N_tot/S0), Rt_estimates )
def assemble_data(request): state_code = get(request, 'state_code') state = state_code_lookup[state_code] print(f"Assembling initial conditions for {state_code} ({state}).") bucket = storage.Client().bucket(bucket_name) data = Path("/tmp") bucket.blob("pipeline/commons/refs/all_india_sero_pop.csv")\ .download_to_filename(data / "all_india_sero_pop.csv") bucket.blob("pipeline/raw/state_case_timeseries.csv")\ .download_to_filename(data / "state_case_timeseries.csv") bucket.blob("pipeline/raw/district_case_timeseries.csv")\ .download_to_filename(data / "district_case_timeseries.csv") bucket.blob("pipeline/raw/vaccine_doses_statewise.csv")\ .download_to_filename(data / "vaccine_doses_statewise.csv") bucket.blob(f"pipeline/est/{state_code}_district_Rt.csv")\ .download_to_filename(data / f"{state_code}_district_Rt.csv") bucket.blob(f"pipeline/est/{state_code}_state_Rt.csv")\ .download_to_filename(data / f"{state_code}_state_Rt.csv") print(f"Downloaded simulation input data for {state_code} ({state}).") district_age_pop = pd.read_csv(data / "all_india_sero_pop.csv").set_index( ["state", "district"]) state_ts = pd.read_csv(data / "state_case_timeseries.csv")\ .set_index(["detected_state", "status_change_date"])\ .drop(columns = ["date", "time", "delta", "logdelta"])\ .rename(columns = { "Deceased": "dD", "Hospitalized": "dT", "Recovered": "dR" }) district_ts = pd.read_csv(data / "district_case_timeseries.csv")\ .set_index(["detected_state", "detected_district", "status_change_date"]).loc[state]\ .drop(columns = ["date", "time", "delta", "logdelta"])\ .rename(columns = { "Deceased": "dD", "Hospitalized": "dT", "Recovered": "dR" }) state_Rt = pd.read_csv(data / f"{state_code}_state_Rt.csv", index_col = 0, parse_dates = ["dates"])\ [["dates", "Rt_pred"]]\ .assign(district = state)\ .drop_duplicates(subset = "district", keep = "last")\ [["district", "Rt_pred"]]\ .set_index("district") district_Rt = pd.read_csv(data / f"{state_code}_district_Rt.csv", index_col = 0, parse_dates = ["dates"])\ [["district", "dates", "Rt_pred"]]\ .drop_duplicates(subset = "district", keep = "last")\ [["district", "Rt_pred"]]\ .set_index("district") vax = pd.read_csv( data / "vaccine_doses_statewise.csv").set_index("State").T.dropna() vax.columns = vax.columns.str.title() vax.set_index(pd.to_datetime(vax.index), inplace=True) smooth = notched_smoothing(window=window) simulation_start = pd.Timestamp.today() - pd.Timedelta(days=cutoff) districts_to_run = district_age_pop.loc[state] # if time series data not available at the district level, coalesce to state/UT level if state in coalesce_states: districts_to_run = districts_to_run\ .assign(**{f"infected_{i}": (lambda i: lambda _: _[f"sero_{i}"] * _[f"N_{i}"])(i) for i in range(7)})\ .drop(columns = [f"sero_{i}" for i in range(7)])\ .sum(axis = 0)\ .to_frame().T\ .assign(**{f"sero_{i}": (lambda i: lambda _: _[f"infected_{i}"] / _[f"N_{i}"])(i) for i in range(7)})\ [districts_to_run.columns]\ .assign(district = state)\ .set_index("district") ts = state_ts districts_to_run = districts_to_run.join(state_Rt) else: ts = district_ts districts_to_run = districts_to_run.join(district_Rt) print(f"Done reading input data for {state_code} ({state}).") print(f"Running seroprevalence scaling for districts.") rows = [] for _ in districts_to_run.dropna().itertuples(): district, sero_0, sero_1, sero_2, sero_3, sero_4, sero_5, sero_6, N_0, N_1, N_2, N_3, N_4, N_5, N_6, N_tot, Rt = _ print(f"Scaling for {state_code}/{district}.") dR_conf = ts.loc[district].dR dR_conf = dR_conf.reindex(pd.date_range(dR_conf.index.min(), dR_conf.index.max()), fill_value=0) if len(dR_conf) >= window + 1: dR_conf_smooth = pd.Series(smooth(dR_conf), index=dR_conf.index).clip(0).astype(int) else: dR_conf_smooth = dR_conf R_conf_smooth = dR_conf_smooth.cumsum().astype(int) R_conf = R_conf_smooth[survey_date if survey_date in R_conf_smooth.index else -1] R_sero = (sero_0 * N_0 + sero_1 * N_1 + sero_2 * N_2 + sero_3 * N_3 + sero_4 * N_4 + sero_5 * N_5 + sero_6 * N_6) R_ratio = R_sero / R_conf if R_conf != 0 else 1 R0 = R_conf_smooth[simulation_start if simulation_start in R_conf_smooth.index else -1] * R_ratio print("Scaled recoveries.") dD_conf = ts.loc[district].dD dD_conf = dD_conf.reindex(pd.date_range(dD_conf.index.min(), dD_conf.index.max()), fill_value=0) if len(dD_conf) >= window + 1: dD_conf_smooth = pd.Series(smooth(dD_conf), index=dD_conf.index).clip(0).astype(int) else: dD_conf_smooth = dD_conf D_conf_smooth = dD_conf_smooth.cumsum().astype(int) D0 = D_conf_smooth[simulation_start if simulation_start in D_conf_smooth.index else -1] print("Scaled deaths.") dT_conf = ts.loc[district].dT pandemic_start = dT_conf.index.min() dT_conf = dT_conf.reindex(pd.date_range(dT_conf.index.min(), dT_conf.index.max()), fill_value=0) if len(dT_conf) >= window + 1: dT_conf_smooth = pd.Series(smooth(dT_conf), index=dT_conf.index).clip(0).astype(int) else: dT_conf_smooth = dT_conf T_conf_smooth = dT_conf_smooth.cumsum().astype(int) T_conf = T_conf_smooth[survey_date if survey_date in T_conf_smooth.index else -1] T_sero = R_sero + D0 T_ratio = T_sero / T_conf if T_conf != 0 else 1 T0 = T_conf_smooth[simulation_start if simulation_start in T_conf_smooth.index else -1] * T_ratio print("Scaled cases.") S0 = max(0, N_tot - T0) dD0 = dD_conf_smooth[simulation_start if simulation_start in dD_conf_smooth.index else -1] dT0 = dT_conf_smooth[simulation_start if simulation_start in dT_conf_smooth.index else -1] * T_ratio I0 = max(0, (T0 - R0 - D0)) V0 = vax[state][simulation_start if simulation_start in vax. index else -1] * N_tot / districts_to_run.N_tot.sum() print("Resolved vaccination data.") rows.append( (state_code, state, district, sero_0, N_0, sero_1, N_1, sero_2, N_2, sero_3, N_3, sero_4, N_4, sero_5, N_5, sero_6, N_6, N_tot, Rt, S0, I0, R0, D0, dT0, dD0, V0, pandemic_start)) pd.DataFrame(rows, columns=columns).to_csv( data / f"{state_code}_simulation_initial_conditions.csv") bucket.blob(f"pipeline/sim/input/{state_code}_simulation_initial_conditions.csv")\ .upload_from_filename(str(data / f"{state_code}_simulation_initial_conditions.csv"), content_type = "text/csv") return "OK!"
# a snapshot of this csv is checked into the repo at data/tutorial_timeseries.csv in case you run into download problems download_data(data, "districts.csv", "https://api.covid19india.org/csv/latest/") daily_reports = pd.read_csv(data / "districts.csv", parse_dates = ["Date"])\ .rename(str.lower, axis = 1)\ .set_index(["state", "district", "date"])\ .sort_index()\ .loc["Maharashtra", "Mumbai"] daily_cases = daily_reports["confirmed"]\ .diff()\ .clip(lower = 0)\ .dropna()\ smoother = notched_smoothing(window=5) smoothed_cases = pd.Series(data=smoother(daily_cases), index=daily_cases.index) # plot raw and cleaned data beg = "December 15, 2020" end = "March 1, 2021" training_cases = smoothed_cases[beg:end] plt.scatter(daily_cases[beg:end].index, daily_cases[beg:end].values, color="black", s=5, alpha=0.5, label="raw case count data") plt.plot(training_cases.index, training_cases.values,
import pandas as pd from epimargin.smoothing import notched_smoothing from epimargin.estimators import analytical_MPVS import epimargin.plots as plt CI = 0.95 gamma = 0.2 window = 3 smoothing = notched_smoothing(window=window) schema = { "Date Symptom Onset": "symptom_onset", "Date of Hospital Admissions": "admission", "Date tested": "tested", "Date of positive test result": "confirmed", "Date Recovered": "recovered", "Date Died": "died", "Kebupaten/Kota": "regency", "Kecamatan": "district", "age ": "age" } regency_names = { 'Pangkep': 'Pangkajene Dan Kepulauan', 'Pare-Pare': 'Parepare', 'Selayar': 'Kepulauan Selayar', 'Sidrap': 'Sidenreng Rappang' }