def estimate(time_series: pd.Series) -> pd.DataFrame: estimates = analytical_MPVS(time_series, CI=CI, smoothing=smooth, totals=True) return pd.DataFrame( data={ "date": estimates[0], "Rt": estimates[1], "Rt_upper": estimates[2], "Rt_lower": estimates[3], "total_cases": estimates[-4][2:], "new_cases": estimates[-3], })
def run_adaptive_model(df: pd.DataFrame, CI: float, smoothing: Callable, filepath: Path) -> None: ''' Runs adaptive control model of Rt and smoothed case counts based on what is currently in the analytical_MPVS module. Takes in dataframe of cases and saves to csv a dataframe of results. ''' # Initialize results df res_full = pd.DataFrame() # Loop through each state print("Estimating state Rt values...") for state in tqdm(df['state'].unique()): # Calculate Rt for that state state_df = df[df['state'] == state].set_index('date') (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, _, anomaly_dates) = analytical_MPVS( state_df[state_df['positive'] > 0]['positive'], CI=CI, smoothing=smoothing) assert (len(dates) == len(RR_pred)) # Save results res = pd.DataFrame({ 'state': state, 'date': dates, 'RR_pred': RR_pred, 'RR_CI_upper': RR_CI_upper, 'RR_CI_lower': RR_CI_lower, 'T_pred': T_pred, 'T_CI_upper': T_CI_upper, 'T_CI_lower': T_CI_lower, 'new_cases_ts': new_cases_ts, 'total_cases': total_cases[2:], 'anamoly': dates.isin(set(anomaly_dates)) }) res_full = pd.concat([res_full, res], axis=0) # Merge results back onto input df and return merged_df = df.merge(res_full, how='outer', on=['state', 'date']) merged_df.to_csv(filepath / "adaptive_estimates.csv")
smoothing = 10 CI = 0.95 state_cases = pd.read_csv(data / "Bihar_cases_data_Oct03.csv", parse_dates=["date_reported"], dayfirst=True) state_ts = state_cases["date_reported"].value_counts().sort_index() district_names, population_counts, _ = etl.district_migration_matrix( data / "Migration Matrix - District.csv") populations = dict(zip(district_names, population_counts)) # first, look at state level predictions (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS( state_ts, CI=CI, smoothing=notched_smoothing(window=smoothing), totals=False) plt.Rt(dates, Rt_pred[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], CI, ymin=0, ymax=4)\ .title("\nBihar: Reproductive Number Estimate")\ .annotate(f"data from {str(dates[0]).split()[0]} to {str(dates[-1]).split()[0]}")\ .xlabel("date")\ .ylabel("$R_t$", rotation=0, labelpad=20)\ .show() np.random.seed(33) Bihar = SIR("Bihar", 99_000_000, dT0=T_pred[-1], Rt0=Rt_pred[-1],
# model details CI = 0.95 smoothing = 30 alpha = 3.8 beta = 2.25 vs = 0.999 true_Rt = pd.read_table("./true_Rt.txt", dtype="float", squeeze=True) obs_cases = pd.read_table("./obs_cases.txt", dtype="float", squeeze=True) (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS( obs_cases, CI=CI, alpha=alpha, beta=beta, variance_shift=vs, smoothing=lambda ts: box_filter(ts, smoothing, smoothing // 2)) print("Rt today:", RR_pred[-1]) print("a, b, vs, MSE:", alpha, beta, vs, ((true_Rt.loc[len(true_Rt) - len(RR_pred):] - RR_pred)**2).sum()) plot_RR_est(dates, RR_pred, RR_CI_lower, RR_CI_upper, CI)\ .ylabel("Estimated $R_t$")\ .title("Synthetic Data Estimation")\ .size(11, 8) plt.plot(true_Rt.index, true_Rt.values, 'k--', label="True $R_t$") plt.xlim(0, 150) plt.ylim(0, 2.5)
label="state test-scaled") plt.plot(df[state][:, "delta", "confirmed"], label="confirmed") plt.legend() plt.PlotDevice().title(f"\n{state} / case scaling comparison").xlabel( "\ndate").ylabel("cases\n") plt.show() # I vs D estimators gamma = 0.2 window = 7 * days CI = 0.95 smooth = notched_smoothing(window) (dates_I, Rt_I, Rtu_I, Rtl_I, *_) = analytical_MPVS(df[state][:, "delta", "confirmed"], CI=CI, smoothing=smooth, totals=False) (dates_D, Rt_D, Rtu_D, Rtl_D, *_) = analytical_MPVS(df[state][:, "delta", "deceased"], CI=CI, smoothing=smooth, totals=False) plt.Rt(dates_I, Rt_I, Rtu_I, Rtl_I, CI)\ .title(f"{state} - $R_t(I)$ estimator") plt.figure() plt.Rt(dates_D, Rt_D, Rtu_D, Rtl_D, CI)\ .title(f"{state} - $R_t(D)$ estimator") plt.show()
return None cases = pd.read_csv("data/1 Nop 2020 Data collection template update South Sulawesi_update (01112020) (2).csv", usecols = schema.keys())\ .rename(columns = schema)\ .dropna(how = 'all') parse_datetimes(cases.loc[:, "confirmed"]) cases.regency = cases.regency.str.title().map(lambda s: regency_names.get(s, s)) cases.age = cases.age.apply(parse_age) cases = cases.query("regency == 'Makassar'").dropna(subset = ["age"]) cases["age_bin"] = pd.cut(cases.age, [0, 20, 100], labels = ["school", "nonschool"]) cases = cases[cases.confirmed <= "Oct 25, 2020"] age_ts = cases[["age_bin", "confirmed"]].groupby(["age_bin", "confirmed"]).size().sort_index() (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(age_ts.loc["school"], CI = CI, smoothing = smoothing, totals = False) school_Rt = np.mean(Rt_pred[-14:]) school_T_lb = T_CI_lower[-1] school_T_ub = T_CI_upper[-1] plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI)\ .title("\nMakassar: Reproductive Number Estimate: school-age population")\ .xlabel("\ndate")\ .ylabel("$R_t$\n", rotation=0, labelpad=30)\ .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\ .show() (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(age_ts.loc["nonschool"], CI = CI, smoothing = smoothing, totals = False)
run_date = str(pd.Timestamp.now()).split()[0] ts = get_time_series(df, "detected_state") states = ["Maharashtra", "Bihar", "Delhi", "Andhra Pradesh", "Telangana", "Tamil Nadu", "Madhya Pradesh"] for state in states: print(state) print(" + running estimation...") ( dates, Rt_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates ) = analytical_MPVS(ts.loc[state].Hospitalized, CI = CI, smoothing = notched_smoothing(window = smoothing), totals = False) estimates = pd.DataFrame(data = { "dates": dates, "Rt_pred": Rt_pred, "RR_CI_upper": RR_CI_upper, "RR_CI_lower": RR_CI_lower, "T_pred": T_pred, "T_CI_upper": T_CI_upper, "T_CI_lower": T_CI_lower, "total_cases": total_cases[2:], "new_cases_ts": new_cases_ts, }) print(" + Rt today:", Rt_pred[-1]) # plt.Rt(dates, Rt_pred, RR_CI_lower, RR_CI_upper, CI)\ # .ylabel("Estimated $R_t$")\
figs = root / "figs/comparison/kaggle" states = [ "Maharashtra" ] #, "Bihar", "Delhi", "Andhra Pradesh", "Telangana", "Tamil Nadu", "Madhya Pradesh"] kaggle = pd.read_csv(data / "covid_19_india.csv", parse_dates=[1], dayfirst=True).set_index("Date") for state in states: print(state) print(" + running estimation...") (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS( kaggle[kaggle["State/UnionTerritory"] == state].Confirmed, CI=CI, smoothing=lambda ts: box_filter(ts, smoothing, 3)) estimates = pd.DataFrame( data={ "dates": dates, "RR_pred": RR_pred, "RR_CI_upper": RR_CI_upper, "RR_CI_lower": RR_CI_lower, "T_pred": T_pred, "T_CI_upper": T_CI_upper, "T_CI_lower": T_CI_lower, "total_cases": total_cases[2:], "new_cases_ts": new_cases_ts, }) print(" + Rt today:", RR_pred[-1])
(data, figs) = setup(level = "INFO") # for province in provinces: # logger.info("downloading data for %s", province) # download_data(data, filename(province), base_url = "https://data.covid19.go.id/public/api/") province_cases = {province: load_province_timeseries(data, province) for province in provinces} bgn = min(cases.index.min() for cases in province_cases.values()) end = max(cases.index.max() for cases in province_cases.values()) idx = pd.date_range(bgn, end) province_cases = {province: cases.reindex(idx, method = "pad").fillna(0) for (province, cases) in province_cases.items()} natl_cases = sum(province_cases.values()) logger.info("running national-level Rt estimate") (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(natl_cases, CI = CI, smoothing = smoothing) plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI, ymin=0, ymax=4)\ .title("\nIndonesia: Reproductive Number Estimate")\ .xlabel("\ndate")\ .ylabel("$R_t$", rotation=0, labelpad=30)\ .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\ .show() logger.info("running case-forward prediction") IDN = SIR("IDN", 267.7e6, dT0 = T_pred[-1], Rt0 = Rt_pred[-1], mobility = 0, random_seed = 0).run(14) logger.info("province-level projections") migration = np.zeros((len(provinces), len(provinces))) estimates = []
province: load_province_timeseries(data, province, "Apr 1, 2020") for province in provinces } bgn = min(cases.index.min() for cases in province_cases.values()) end = max(cases.index.max() for cases in province_cases.values()) idx = pd.date_range(bgn, end) province_cases = { province: cases.reindex(idx, method="pad").fillna(0) for (province, cases) in province_cases.items() } prediction_period = 14 * days for province in provinces: title = province.title().replace("Dki", "DKI") logger.info(title) (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(province_cases[province], CI = CI, smoothing = smoothing) IDN = Model.single_unit(name = province, population = priority_pops[province], I0 = T_pred[-1], RR0 = RR_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0)\ .run(prediction_period) plt.Rt(dates, RR_pred, RR_CI_upper, RR_CI_lower, CI, ymin=0.2, ymax=4.5)\ .title(f"{title}")\ .xlabel("\ndate")\ .ylabel("$R_t$", rotation=0, labelpad=30)\ .show() # plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI, IDN[0].delta_T[:-1], IDN[0].lower_CI[1:], IDN[0].upper_CI[1:])\ # .title(f"\n{title}")\ # .xlabel("\ndate")\ # .ylabel("cases")\ # .show()
.set_axis(dkij.columns.str.lower(), 1)\ .assign( district = dkij.district.str.title(), subdistrict = dkij.subdistrict.apply(lambda name: next((k for (k, v) in replacements.items() if name in v), name)), ) district_cases = dkij.groupby(["district", "date_positiveresult"])["id"].count().sort_index() districts = sorted(dkij.district.unique()) migration = np.zeros((len(districts), len(districts))) R_mandatory = dict() R_voluntary = dict() max_len = 1 + max(map(len, districts)) with tqdm(districts) as progress: for district in districts: progress.set_description(f"{district :<{max_len}}") (dates, RR_pred, *_) = analytical_MPVS(district_cases.loc[district], CI = CI, smoothing = notched_smoothing(window = window), totals=False) Rt = pd.DataFrame(data = {"Rt": RR_pred[1:]}, index = dates) R_mandatory[district] = np.mean(Rt[(Rt.index > "April 1, 2020") & (Rt.index < "June 1, 2020")])[0] R_voluntary[district] = np.mean(Rt[(Rt.index < "April 1, 2020")])[0] pops = [ 2_430_410, 910_381, 2_164_070, 2_817_994, 1_729_444, 23_011 ] gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\ .query("NAME_1 == 'Jakarta Raya'")\
cases = pd.read_stata(data/"coviddkijakarta_290920.dta")\ .query("province == 'DKI JAKARTA'")\ .drop(columns = dkij_drop_cols + ["province"]) cases = cases\ .set_axis(cases.columns.str.lower(), 1)\ .assign( district = cases.district.str.title(), subdistrict = cases.subdistrict.apply(lambda name: next((k for (k, v) in replacements.items() if name in v), name)), ) cases["age_bin"] = pd.cut(cases.age, bins=[0] + list(range(20, 80, 10)) + [100]) age_ts = cases[["age_bin", "date_positiveresult" ]].groupby(["age_bin", "date_positiveresult"]).size().sort_index() dkij_max_rts = {} (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(age_ts.sum(level = 1), CI = CI, smoothing = smoothing, totals = False) r = pd.Series(Rt_pred, index=dates) dkij_max_rts["all"] = r[r.index.month_name() == "April"].max() for age_bin in age_ts.index.get_level_values(0).categories: print(age_bin) (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(age_ts.loc[age_bin], CI = CI, smoothing = smoothing, totals = False) r = pd.Series(Rt_pred, index=dates) dkij_max_rts[age_bin] = r[r.index.month_name() == "April"].max() print(dkij_max_rts)
def estimate(ts, smoothing): (state_dates, R, *_) = analytical_MPVS(ts.Hospitalized, smoothing = smoothing) dates = [sd[1] if isinstance(sd, tuple) else sd for sd in state_dates] return pd.DataFrame({"date": dates, "R": R}).set_index("date")
df = load_all_data( v3_paths = [data/filepath for filepath in paths['v3']], v4_paths = [data/filepath for filepath in paths['v4']] ) data_recency = str(df["date_announced"].max()).split()[0] run_date = str(pd.Timestamp.now()).split()[0] ts = get_time_series(df[df.detected_state == "Delhi"]) ( dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates ) = analytical_MPVS(ts.delta[ts.delta > 0], CI = CI, smoothing = convolution(window = smoothing)) #= analytical_MPVS(ts.Hospitalized[ts.Hospitalized > 0], CI = CI, smoothing = lambda ts: box_filter(ts, smoothing, 10)) np.random.seed(33) delhi = Model([ModelUnit("Delhi", 18_000_000, I0 = T_pred[-1], RR0 = RR_pred[-1], mobility = 0)]) delhi.run(14, np.zeros((1,1))) t_pred = [dates[-1] + pd.Timedelta(days = i) for i in range(len(delhi[0].delta_T))] plot_RR_est(dates, RR_pred, RR_CI_upper, RR_CI_lower, CI) PlotDevice().title("Delhi: Reproductive Number Estimate").xlabel("Date").ylabel("Rt", rotation=0, labelpad=20) plt.show() delhi[0].lower_CI[0] = T_CI_lower[-1] delhi[0].upper_CI[0] = T_CI_upper[-1] print(delhi[0].delta_T)
# plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI, # prediction_ts = [ # (current[0].delta_T[1:], current[0].lower_CI[1:], current[0].upper_CI[1:], "orange", r"projection with current $R_t$"), # (target[0].delta_T[1:], target[0].lower_CI[1:], target[0].upper_CI[1:], "green", r"projection with $R_t \rightarrow 0.9$") # ])\ # .adjust(left = 0.10, right = 0.95, bottom = 0.15, top = 0.95)\ # .xlabel("date")\ # .ylabel("cases")\ # .show() # run Indian states for (state, pop) in [("Maharashtra", 112374333), ("Gujarat", 60439692), ("Bihar", 104099452)]: print(state) (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) \ = analytical_MPVS(india[india.state == state][["date", "confirmed"]].set_index("date") , CI = CI, smoothing = smoothing, totals=True) dates = [pd.Timestamp(_).to_pydatetime().date() for _ in dates] anomaly_dates = [ pd.Timestamp(_).to_pydatetime().date() for _ in anomaly_dates ] model = lambda: Model.single_unit(name=state, RR0=Rt_pred[-1], population=pop, infectious_period=infectious_period, I0=T_pred[-1], lower_CI=T_CI_lower[-1], upper_CI=T_CI_upper[-1], random_seed=33) forward_pred_period = 9 t_pred = [ dates[-1] + pd.Timedelta(days=i)
time_series["delta_I"] = time_series.groupby(level=0)['Hospitalized'].diff() time_series["dow"] = time_series.index.get_level_values(1).dayofweek plot_average_change(time_series, "(All India)", filename=figs / "avg_delta_I_DoW_India.png") for state in tqdm(time_series.index.get_level_values(0).unique()): plot_average_change(time_series.loc[state], f"({state})", filename=figs / f"avg_delta_I_DoW_{state}.png") # are anomalies falling on certain days? print("checking anomalies...") smoothing = 5 (*_, anomaly_dates) = analytical_MPVS(natl_time_series["Hospitalized"].iloc[:-1], CI=0.95, smoothing=convolution(window=smoothing)) anomaly_histogram(anomaly_dates, "(All India)", filename=figs / "anomaly_DoW_hist_India.png") for state in tqdm(time_series.index.get_level_values(0).unique()): (*_, anomaly_dates) = analytical_MPVS( time_series.loc[state]["Hospitalized"].iloc[:-1], CI=0.95, smoothing=convolution(window=smoothing)) anomaly_histogram(anomaly_dates, f"({state})", filename=figs / f"anomaly_DoW_hist_{state}.png") print("estimating spectral densities...") # what does the aggregate spectral density look like?
for _ in top_level[timeseries]], columns=["date", "total_cases"]) df["date"] = (date_scale * df["date"]).apply(pd.Timestamp) return df.set_index("date") logger.info("district-level projections") pops = [sum([2_430_410, 910_381, 2_164_070, 2_817_994, 1_729_444, 23_011])] dkij = load_province_timeseries(data, district) R_mandatory = dict() R_voluntary = dict() (dates, Rt_pred, *_) = analytical_MPVS(dkij, CI=CI, smoothing=notched_smoothing(window=window), totals=True) Rt = pd.DataFrame(data={"Rt": Rt_pred}, index=dates) R_mandatory[district] = np.mean(Rt[(Rt.index >= "Sept 21, 2020")])[0] R_voluntary[district] = np.mean(Rt[(Rt.index < "April 1, 2020")])[0] si, sf = 0, 10 simulation_results = [ run_policies([dkij.iloc[-1][0] - dkij.iloc[-2][0]], pops, districts, np.zeros((1, 1)), gamma, R_mandatory, R_voluntary,
CI = 0.95 # private data state_cases = pd.read_csv(data / "Bihar_cases_data_Jul23.csv", parse_dates=["date_reported"], dayfirst=True) state_ts = state_cases["date_reported"].value_counts().sort_index() district_names, population_counts, _ = etl.district_migration_matrix( data / "Migration Matrix - District.csv") populations = dict(zip(district_names, population_counts)) # first, look at state level predictions (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(state_ts, CI=CI, smoothing=convolution(window=smoothing)) plot_RR_est(dates, RR_pred, RR_CI_upper, RR_CI_lower, CI, ymin=0, ymax=4)\ .title("Bihar: Reproductive Number Estimate Comparisons")\ .xlabel("Date")\ .ylabel("Rt", rotation=0, labelpad=20) plt.ylim(0, 4) # public data paths = { "v3": [data_path(_) for _ in (1, 2)], "v4": [data_path(_) for _ in range(3, 13)] } for target in paths['v3'] + paths['v4']:
.query("province == 'DKI JAKARTA'")\ .drop(columns=dkij_drop_cols + ["province"]) dkij["district"] = dkij.district.str.title() gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\ .query("NAME_1 == 'Jakarta Raya'")\ .drop(columns=shp_drop_cols) bbox = shapely.geometry.box(minx = 106.65, maxx = 107.00, miny = -6.40, maxy=-6.05) gdf = gdf[gdf.intersects(bbox)] jakarta_districts = dkij.district.str.title().unique() jakarta_cases = dkij.groupby("date_positiveresult")["id"].count().rename("cases") logger.info("running province-level Rt estimate") (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(jakarta_cases, CI = CI, smoothing = smoothing, totals=False) plt.Rt(dates, RR_pred[1:], RR_CI_upper[1:], RR_CI_lower[1:], CI)\ .title("\nDKI Jakarta: Reproductive Number Estimate")\ .xlabel("\ndate")\ .ylabel("$R_t$\n", rotation=0, labelpad=30)\ .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\ .show() logger.info("running case-forward prediction") prediction_period = 14*days IDN = SIR(name = "IDN", population = 267.7e6, dT0 = T_pred[-1], Rt0 = RR_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0)\ .run(prediction_period) plt.daily_cases(dates, T_pred[1:], T_CI_upper[1:], T_CI_lower[1:], new_cases_ts[1:], anomaly_dates, anomalies, CI,
plt.plot(sir_model.dT) plt.show() plt.plot(R0_timeseries, "-", color="black", label="$R_0$") plt.plot(sir_model.Rt, "-", color="dodgerblue", label="$R_t$") plt.legend(framealpha=1, handlelength=1, loc="best") plt.PlotDevice().xlabel("time").ylabel("reproductive rate").adjust(left=0.10, bottom=0.15, right=0.99, top=0.99) plt.ylim(0.5, 1.5) plt.show() # 1: parametric scheme: dates, Rt, Rt_lb, Rt_ub, *_, anomalies, anomaly_dates = analytical_MPVS( pd.DataFrame(sir_model.dT), smoothing=convolution("uniform", 2), CI=0.99, totals=False) pd = plt.Rt(dates, Rt, Rt_ub, Rt_lb, ymin = 0.5, ymax = 2.5, CI = 0.99, yaxis_colors = False, format_dates = False, critical_threshold = False)\ .xlabel("time")\ .ylabel("reproductive rate")\ .adjust(left = 0.11, bottom = 0.15, right = 0.98, top = 0.98) plt.plot(sir_model.Rt, "-", color="white", linewidth=3, zorder=10) sim_rt, = plt.plot(sir_model.Rt, "-", color="dodgerblue", linewidth=2, zorder=11) anoms = plt.vlines(anomaly_dates, 0, 4, colors="red", linewidth=2, alpha=0.5) plt.legend([pd.markers["Rt"], sim_rt, anoms], ["Estimated $R_t$ (99% CI)", "simulated $R_t$", "anomalies"],
gamma = 0.2 smoothing = 10 CI = 0.95 download_data(data, 'state_wise_daily.csv') state_df = load_statewise_data(data / "state_wise_daily.csv") country_time_series = get_time_series(state_df) estimates = [] timeseries = [] # country level (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS( country_time_series["Hospitalized"].iloc[:-1], CI=CI, smoothing=notched_smoothing(window=smoothing)) country_code = state_name_lookup["India"] for row in zip(dates, RR_pred, RR_CI_upper, RR_CI_lower): timeseries.append((country_code, *row)) # state level rt estimates state_time_series = get_time_series(state_df, 'state') state_names = list(state_time_series.index.get_level_values(level=0).unique()) max_len = 1 + max(map(len, state_names)) with tqdm(state_names) as states: for state in states: state_code = state_name_lookup[state] states.set_description(f"{state :<{max_len}}") try:
# .apply(get_generation_interval, axis = 1)\ # .dropna()\ # .value_counts()\ # .sort_index() # generation_interval = generation_interval[(generation_interval.index >= 0) & (generation_interval.index <= 60)] # generation_interval /= generation_interval.sum() new_cases = cases.confirmed.value_counts().sort_index() new_cases_smoothed = smoothing(new_cases) plt.plot(new_cases, '.', color = "blue") plt.plot(new_cases.index, new_cases_smoothed, '-', color = "black") plt.show() logger.info("running province-level Rt estimate") (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(new_cases, CI = CI, smoothing = smoothing, totals = False) plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI)\ .title("\nSouth Sulawesi: Reproductive Number Estimate")\ .xlabel("\ndate")\ .ylabel("$R_t$\n", rotation=0, labelpad=30)\ .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\ .show() logger.info("running case-forward prediction") prediction_period = 14*days I0 = (~cases.confirmed.isna()).sum() - (~cases.recovered.isna()).sum() - (~cases.died.isna()).sum() IDN = SIR(name = "IDN", population = 8_819_500, dT0 = T_pred[-1], Rt0 = Rt_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0, I0 = I0)\ .run(prediction_period) plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI,
age_structure_norm = sum(IN_age_structure.values()) IN_age_ratios = np.array( [v / age_structure_norm for (k, v) in IN_age_structure.items()]) split_by_age = lambda v: (v * IN_age_ratios).astype(int) # get age-specific prevalence from KA sero KA = pd.read_stata("data/ka_cases_deaths_time_newagecat.dta") KA.agecat = KA.agecat.where( KA.agecat != 85, 75) # we don't have econ data for 85+ so combine 75+ and 85+ categories KA_agecases = KA.groupby(["agecat", "date"])["patientcode"]\ .count().sort_index().rename("cases")\ .unstack().fillna(0).stack() KA_ts = KA_agecases.sum(level=1) (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(KA_ts, notched_smoothing(5)) COVID_age_ratios = (KA_agecases.sum(level=0) / KA_agecases.sum()).values split_by_prevalence = lambda v: (v * IN_age_ratios).astype(int) for seed in range(10): model = AgeStructured("KA", 6.11e7, 857, 915345, 1.826, diag(u) @ C, IN_age_ratios, COVID_age_ratios, seed) counter = 0 while model.dT[-1].sum() > 0: model.forward_epi_step() counter += 1 print(seed, counter, model.dT)
if vax_pct_annual_goal == 0 and vax_effectiveness != 1.00: continue # grab time series D, R = ts.loc[district][["dD", "dR"]].sum() dT_conf_district = ts.loc[district].dT dT_conf_district = dT_conf_district.reindex(pd.date_range( dT_conf_district.index.min(), dT_conf_district.index.max()), fill_value=0) dT_conf_district_smooth = pd.Series( smooth(dT_conf_district), index=dT_conf_district.index).clip(0).astype(int) # run Rt estimation on scaled timeseries (Rt_dates, Rt_est, *_) = analytical_MPVS(T_ratio * dT_conf_district_smooth, CI=CI, smoothing=lambda _: _, totals=False) Rt = dict(zip(Rt_dates, Rt_est)) daily_rate = vax_pct_annual_goal / 365 daily_vax_doses = int(vax_effectiveness * daily_rate * N_district) T_scaled = dT_conf_district_smooth.cumsum()[simulation_start] * T_ratio model = SIR( name=state, population=N_district, dT0=np.ones(num_sims) * (dT_conf_district_smooth[simulation_start] * T_ratio).astype(int), Rt0=Rt[simulation_start] * N_district / (N_district - T_scaled), I0=np.ones(num_sims) * (T_scaled - R - D),
.query("age.str.strip() != ''", engine = "python") parse_datetimes(cases.loc[:, "confirmed"]) cases.regency = cases.regency.str.title().map( lambda s: regency_names.get(s, s)) cases.age = cases.age.apply(parse_age) cases = cases.dropna(subset=["age"]) cases["age_bin"] = pd.cut(cases.age, bins=[0] + list(range(20, 80, 10)) + [100]) age_ts = cases[["age_bin", "confirmed"]].groupby(["age_bin", "confirmed"]).size().sort_index() ss_max_rts = {} fig, axs = plt.subplots(4, 2, True, True) (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(age_ts.sum(level = 1), CI = CI, smoothing = notched_smoothing(window = 5), totals = False) plt.sca(axs.flat[0]) plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI).annotate(f"all ages").adjust(left=0.04, right=0.96, top=0.95, bottom=0.05, hspace=0.3, wspace=0.15) r = pd.Series(Rt_pred, index=dates) ss_max_rts["all"] = r[r.index.month_name() == "April"].max() for (age_bin, ax) in zip(age_ts.index.get_level_values(0).categories, axs.flat[1:]): print(age_bin) (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
gamma = 0.2 window = 10 CI = 0.95 state_cases = pd.read_csv(data/"Bihar_cases_data_Oct03.csv", parse_dates=["date_reported", "date_status_change"], dayfirst=True) state_cases["geo_reported"] = state_cases.geo_reported.str.strip() state_cases = state_cases[state_cases.date_reported <= "2020-09-30"] state_ts = state_cases["date_reported"].value_counts().sort_index() district_ts = state_cases.groupby(["geo_reported", "date_reported"])["date_reported"].count().sort_index() districts, pops, migrations = etl.district_migration_matrix(data/"Migration Matrix - District.csv") districts = sorted([etl.replacements.get(dn, dn) for dn in districts]) R_mandatory = dict() for district in districts:#district_ts.index.get_level_values(0).unique(): try: (_, Rt, *_) = analytical_MPVS(district_ts.loc[district], CI = CI, smoothing = notched_smoothing(window = 10), totals = False) Rm = np.mean(Rt) except ValueError as v: Rm = 1.5 R_mandatory[district] = Rm R_voluntary = {district: 1.2*R for (district, R) in R_mandatory.items()} si, sf = 0, 10 simulation_results = [ run_policies(state_cases, pops, districts, migrations, gamma, R_mandatory, R_voluntary, lockdown_period = lockdown_period, total = total_time, seed = seed) for seed in tqdm(range(si, sf)) ] plt.simulations(
ts_full = get_time_series(df, "detected_state") ts = ts_full.query("status_change_date <= 'October 14, 2020'") states = ["Bihar", "Maharashtra", "Punjab", "Tamil Nadu"][-1:] codes = ["BR", "MH", "PN", "TN"][-1:] pops = [99.02e6, 114.2e6, 27.98e6, 67.86e6][-1:] Rt_ranges = [(0.9, 2.4), (0.55, 2.06), (0.55, 2.22), (0.84, 1.06)][-1:] windows = [7, 14, 7, 10][-1:] for (state, code, pop, Rt_range, smoothing) in zip(states, codes, pops, Rt_ranges, windows): print(state) print(" + running estimation...") state_ts_full = pd.Series(data = notched_smoothing(window = smoothing)(ts_full.loc[state].Hospitalized), index = ts_full.loc[state].Hospitalized.index) (dates, Rt_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(ts.loc[state].Hospitalized, CI = CI, smoothing = lambda x:x, totals = False) Rt = pd.DataFrame({"Rt": Rt_pred}, index = dates) Rt_m = np.mean(Rt[(Rt.index >= "31 March, 2020") & (Rt.index <= "17 May, 2020")])[0] Rt_v = np.mean(Rt[(Rt.index < "31 March, 2020")])[0] print(" + Rt today:", Rt_pred[-1]) print(" + Rt_m :", Rt_m) print(" + Rt_v :", Rt_v) historical = pd.DataFrame({"smoothed": new_cases_ts}, index = dates) plt.Rt(dates, Rt_pred, RR_CI_lower, RR_CI_upper, CI)\ .ylabel("$R_t$")\ .xlabel("date")\ .title(f"\n{state}: Reproductive Number Estimate")\ .annotate(f"public data from {str(dates[0]).split()[0]} to {str(dates[-1]).split()[0]}")\ .show()