def plot_mobility(series, label, stringency = None, until = None, annotation = "Google Mobility Data; baseline mobility measured from Jan 3 - Feb 6"): plt.plot(series.date, smoothed(series.retail_and_recreation_percent_change_from_baseline), label = "Retail/Recreation") plt.plot(series.date, smoothed(series.grocery_and_pharmacy_percent_change_from_baseline), label = "Grocery/Pharmacy") plt.plot(series.date, smoothed(series.parks_percent_change_from_baseline), label = "Parks") plt.plot(series.date, smoothed(series.transit_stations_percent_change_from_baseline), label = "Transit Stations") plt.plot(series.date, smoothed(series.workplaces_percent_change_from_baseline), label = "Workplaces") plt.plot(series.date, smoothed(series.residential_percent_change_from_baseline), label = "Residential") if until: right = pd.Timestamp(until) elif stringency is not None: right = stringency.Date.max() else: right = series.date.iloc[-1] lax = plt.gca() if stringency is not None: plt.sca(lax.twinx()) stringency_IN = stringency.query("CountryName == 'India'") stringency_US = stringency.query("(CountryName == 'United States') & (RegionName.isnull())", engine = "python") plt.plot(stringency_IN.Date, stringency_IN.StringencyIndex, 'k--', alpha = 0.6, label = "IN Measure Stringency") plt.plot(stringency_US.Date, stringency_US.StringencyIndex, 'k.' , alpha = 0.6, label = "US Measure Stringency") plt.PlotDevice().ylabel("lockdown stringency index", rotation = -90, labelpad = 50) plt.legend() plt.sca(lax) plt.legend(loc = "lower right") plt.fill_betweenx((-100, 60), pd.to_datetime("March 24, 2020"), pd.to_datetime("June 1, 2020"), color = "black", alpha = 0.05, zorder = -1) plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = -90, fontdict = plt.theme.note, ha = "center", va = "top") plt.PlotDevice()\ .xlabel("\ndate")\ .ylabel("% change in mobility\n") # .title(f"\n{label}: Mobility & Lockdown Trends")\ # .annotate(annotation)\ plt.ylim(-100, 60) plt.xlim(left = series.date.iloc[0], right = right)
"dates": dth_dates, "Rt_pred": dth_Rt_pred, "Rt_CI_upper": dth_Rt_CI_upper, "Rt_CI_lower": dth_Rt_CI_lower, "T_pred": dth_T_pred, "T_CI_upper": dth_T_CI_upper, "T_CI_lower": dth_T_CI_lower, "total_cases": dth_total_cases[2:], "new_cases_ts": dth_new_cases_ts, }) dth_estimates["anomaly"] = dth_estimates["dates"].isin( set(dth_anomaly_dates)) print(" + Rt (dth) today:", inf_Rt_pred[-1]) fig, axs = plt.subplots(1, 2, sharey=True) plt.sca(axs[0]) plt.Rt(inf_dates, inf_Rt_pred, inf_Rt_CI_lower, inf_Rt_CI_upper, CI)\ .axis_labels("date", "$R_t$") plt.title("estimated from infections", loc="left", fontdict=plt.theme.label) # fig, axs = plt.subplots(3, 1, sharex = True) # plt.sca(axs[0]) # plt.plot(dth_dates, delhi_dD_smoothed[2:], color = "orange") # plt.title("d$D$/d$t$", loc = "left", fontdict = plt.theme.label) # plt.sca(axs[1]) # plt.plot(dth_dates, np.diff(delhi_dD_smoothed)[1:], color = "red") # plt.title("d$^2D$/d$t^2$", loc = "left", fontdict = plt.theme.label)
def generate_report(state_code: str): print(f"Received request for {state_code}.") state = state_code_lookup[state_code] normalized_state = state.replace(" and ", " And ").replace(" & ", " And ") blobs = { f"pipeline/est/{state_code}_state_Rt.csv": f"/tmp/state_Rt_{state_code}.csv", f"pipeline/est/{state_code}_district_Rt.csv": f"/tmp/district_Rt_{state_code}.csv", f"pipeline/commons/maps/{state_code}.json": f"/tmp/state_{state_code}.geojson" } if normalized_state not in dissolved_states else { f"pipeline/est/{state_code}_state_Rt.csv": f"/tmp/state_Rt_{state_code}.csv", } for (blob_name, filename) in blobs.items(): bucket.blob(blob_name).download_to_filename(filename) print(f"Downloaded estimates for {state_code}.") state_Rt = pd.read_csv(f"/tmp/state_Rt_{state_code}.csv", parse_dates=["dates"], index_col=0) plt.close("all") dates = [pd.Timestamp(date).to_pydatetime() for date in state_Rt.dates] plt.Rt(dates, state_Rt.Rt_pred, state_Rt.Rt_CI_lower, state_Rt.Rt_CI_upper, CI)\ .axis_labels("date", "$R_t$")\ .title(f"{state}: $R_t$ over time", ha = "center", x = 0.5)\ .adjust(left = 0.11, bottom = 0.16) plt.gcf().set_size_inches(3840 / 300, 1986 / 300) plt.savefig(f"/tmp/{state_code}_Rt_timeseries.png") plt.close() print(f"Generated timeseries plot for {state_code}.") # check output is at least 50 KB timeseries_size_kb = os.stat( f"/tmp/{state_code}_Rt_timeseries.png").st_size / 1000 print(f"Timeseries artifact size: {timeseries_size_kb} kb") assert timeseries_size_kb > 50 bucket.blob( f"pipeline/rpt/{state_code}_Rt_timeseries.png").upload_from_filename( f"/tmp/{state_code}_Rt_timeseries.png", content_type="image/png") if normalized_state not in (island_states + dissolved_states): district_Rt = pd.read_csv(f"/tmp/district_Rt_{state_code}.csv", parse_dates=["dates"], index_col=0) latest_Rt = district_Rt[district_Rt.dates == district_Rt.dates.max( )].set_index("district")["Rt_pred"].to_dict() top10 = [(k, "> 3.0" if v > 3 else f"{v:.2f}") for (k, v) in sorted( latest_Rt.items(), key=lambda t: t[1], reverse=True)[:10]] gdf = gpd.read_file(f"/tmp/state_{state_code}.geojson") gdf["Rt"] = gdf.district.map(latest_Rt) fig, ax = plt.subplots() fig.set_size_inches(3840 / 300, 1986 / 300) plt.choropleth(gdf, title = None, mappable = plt.get_cmap(0.75, 2.5), fig = fig, ax = ax)\ .adjust(left = 0) plt.sca(fig.get_axes()[0]) plt.PlotDevice(fig).title(f"{state}: $R_t$ by district", ha="center", x=0.5) plt.axis('off') plt.savefig(f"/tmp/{state_code}_Rt_choropleth.png", dpi=300) plt.close() print(f"Generated choropleth for {state_code}.") # check output is at least 100 KB choropleth_size_kb = os.stat( f"/tmp/{state_code}_Rt_choropleth.png").st_size / 1000 print(f"Choropleth artifact size: {choropleth_size_kb} kb") assert choropleth_size_kb > 100 bucket.blob(f"pipeline/rpt/{state_code}_Rt_choropleth.png" ).upload_from_filename( f"/tmp/{state_code}_Rt_choropleth.png", content_type="image/png") else: print(f"Skipped choropleth for {state_code}.") if normalized_state not in dissolved_states: fig, ax = plt.subplots(1, 1) ax.axis('tight') ax.axis('off') table = ax.table(cellText=top10, colLabels=["district", "$R_t$"], loc='center', cellLoc="center") table.scale(1, 2) for (row, col), cell in table.get_celld().items(): if (row == 0): cell.set_text_props(fontfamily=plt.theme.label["family"], fontsize=plt.theme.label["size"], fontweight="semibold") else: cell.set_text_props(fontfamily=plt.theme.label["family"], fontsize=plt.theme.label["size"], fontweight="light") plt.PlotDevice().title(f"{state}: top districts by $R_t$", ha="center", x=0.5) plt.savefig(f"/tmp/{state_code}_Rt_top10.png", dpi=600) plt.close() print(f"Generated top 10 district listing for {state_code}.") # check output is at least 50 KB top10_size_kb = os.stat( f"/tmp/{state_code}_Rt_top10.png").st_size / 1000 print(f"Top 10 listing artifact size: {top10_size_kb} kb") assert top10_size_kb > 50 bucket.blob( f"pipeline/rpt/{state_code}_Rt_top10.png").upload_from_filename( f"/tmp/{state_code}_Rt_top10.png", content_type="image/png") else: print(f"Skipped top 10 district listing for {state_code}.") # sleep for 15 seconds to ensure the images finish saving time.sleep(15) print(f"Uploaded artifacts for {state_code}.") return "OK!"
# data prep with (data/'timeseries.json').open("rb") as fp: df = flat_table.normalize(pd.read_json(fp)).fillna(0) df.columns = df.columns.str.split('.', expand = True) dates = np.squeeze(df["index"][None].values) df = df.drop(columns = "index").set_index(dates).stack([1, 2]).drop("UN", axis = 1) series = mobility[mobility.sub_region_1.isna()] plt.plot(series.date, smoothed(series.retail_and_recreation_percent_change_from_baseline), label = "Retail/Recreation") plt.fill_betweenx((-100, 60), pd.to_datetime("March 24, 2020"), pd.to_datetime("June 1, 2020"), color = "black", alpha = 0.05, zorder = -1) plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = -20, fontdict = plt.note_font, ha = "center", va = "top") plt.ylim(-100, 10) plt.xlim(series.date.min(), series.date.max()) plt.legend(loc = 'upper right') lax = plt.gca() plt.sca(lax.twinx()) plt.plot(df["TT"][:, "delta", "confirmed"].index, smoothed(df["TT"][:, "delta", "confirmed"].values), label = "Daily Cases", color = plt.PRED_PURPLE) plt.legend(loc = 'lower right') plt.PlotDevice().ylabel("new cases", rotation = -90, labelpad = 50) plt.sca(lax) plt.PlotDevice().title("\nIndia Mobility and Case Count Trends")\ .annotate("Google Mobility Data + Covid19India.org")\ .xlabel("\ndate")\ .ylabel("% change in mobility\n") plt.show() plt.plot(series.date, smoothed(series.retail_and_recreation_percent_change_from_baseline), label = "Retail/Recreation") plt.fill_betweenx((-100, 60), pd.to_datetime("March 24, 2020"), pd.to_datetime("June 1, 2020"), color = "black", alpha = 0.05, zorder = -1) plt.text(s = "national lockdown", x = pd.to_datetime("April 27, 2020"), y = -20, fontdict = plt.note_font, ha = "center", va = "top") plt.ylim(-100, 10) plt.xlim(series.date.min(), series.date.max())
xticks = { "Surat", "Narmada", "Mumbai", "Thane", "Pune", "Aurangabad", "Parbhani", "Nanded", "Yavatmal", "Chennai" } pop_density = pd.read_csv(data / "popdensity.csv").set_index( ["state", "district"]) fig, ax_nest = plt.subplots(ncols=ncols, nrows=nrows) for (j, i) in product(range(nrows), range(ncols)): if (i + 1, j + 1) in coords.values(): continue ax_nest[j, i].axis("off") for ((state, district), (x, y)) in coords.items(): plt.sca(ax_nest[y - 1, x - 1]) urban_share = int( (1 - serodist.loc[state, ("New " if district == "Delhi" else "") + district]["rural_share"].mean()) * 100) density = pop_density.loc[state, district].density rt_data = district_estimates.loc[state, district].set_index( "dates")["Feb 1, 2021":] plt.Rt(rt_data.index, rt_data.Rt_pred, rt_data.RR_CI_upper, rt_data.RR_CI_lower, 0.95, yaxis_colors=False, ymin=0.5, ymax=2.0) if (x, y) != (4, 1):
(regency, Rt_pred[-1], Rt_CI_lower[-1], Rt_CI_upper[-1], linear_projection(dates, Rt_pred, 7))) estimates = pd.DataFrame(estimates) estimates.columns = ["regency", "Rt", "Rt_CI_lower", "Rt_CI_upper", "Rt_proj"] estimates.set_index("regency", inplace=True) estimates.to_csv("data/SULSEL_Rt_projections.csv") print(estimates) gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\ .query("NAME_1 == 'Sulawesi Selatan'")\ .merge(estimates, left_on = "NAME_2", right_on = "regency") choro = plt.choropleth(gdf, mappable=plt.get_cmap(0.4, 1.4, "viridis")) for ax in choro.figure.axes[:-1]: plt.sca(ax) plt.xlim(left=119, right=122) plt.ylim(bottom=-7.56, top=-1.86) plt.show() logger.info("adaptive control") (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(new_cases, CI = CI, smoothing = smoothing, totals = False) Rt = pd.DataFrame(data={"Rt": Rt_pred[1:]}, index=dates) Rt_current = Rt_pred[-1] Rt_m = np.mean(Rt[(Rt.index >= "April 21, 2020") & (Rt.index <= "May 22, 2020")])[0] Rt_v = np.mean(Rt[(Rt.index <= "April 14, 2020")])[0] Rt_m_scaled = Rt_current + 0.75 * (Rt_m - Rt_current)
plt.Rt(list(state_Rt.dates), state_Rt.Rt_pred, state_Rt.Rt_CI_lower, state_Rt.Rt_CI_upper, CI)\ .axis_labels("date", "$R_t$")\ .title("Maharashtra: $R_t$ over time", ha = "center", x = 0.5)\ .adjust(left = 0.11, bottom = 0.16) plt.gcf().set_size_inches(3840 / 300, 1986 / 300) plt.savefig("./MH_Rt_timeseries.png") plt.clf() gdf = gpd.read_file("data/maharashtra.json", dpi=600) gdf["Rt"] = gdf.district.map(latest_Rt) fig, ax = plt.subplots() fig.set_size_inches(3840 / 300, 1986 / 300) plt.choropleth(gdf, title = None, mappable = plt.get_cmap(0.75, 2.5), fig = fig, ax = ax)\ .adjust(left = 0) plt.sca(fig.get_axes()[0]) plt.PlotDevice(fig).title(f"{state}: $R_t$ by district", ha="center", x=0.5) plt.axis('off') plt.savefig(f"./{state_code}_Rt_choropleth.png", dpi=300) plt.clf() top10 = [ (k, "> 3.0" if v > 3 else f"{v:.2f}", v) for (k, v) in sorted(latest_Rt.items(), key=lambda t: t[1], reverse=True)[:10] ] fig, ax = plt.subplots(1, 1) ax.axis('tight') ax.axis('off') table = ax.table(cellText=[(k, l) for (k, l, v) in top10],
.axis_labels(x = "age group", y = "CFR (log-scaled)")\ .l_title("CFR in India (adjusted for reporting)")\ .r_title("source:\nICMR")\ .adjust(left = 0.11, bottom = 0.15, right = 0.95) plt.semilogy() plt.show() # fig 3 india_data = pd.read_csv(results / "india_data.csv", parse_dates = ["dt"])\ .query("State == 'TT'")\ .set_index("dt")\ .sort_index() fig, axs = plt.subplots(2, 2, sharex=True, sharey=True) plt.sca(axs[0, 0]) plt.scatter(india_data.index, india_data["cfr_2week"], color="black", s=2) plt.title("2-week lag", loc="left", fontdict=plt.theme.label) plt.sca(axs[0, 1]) plt.scatter(india_data.index, india_data["cfr_maxcor"], color="black", s=2) plt.title("10-day lag", loc="left", fontdict=plt.theme.label) plt.sca(axs[1, 0]) plt.scatter(india_data.index, india_data["cfr_1week"], color="black", s=2) plt.title("1-week lag", loc="left", fontdict=plt.theme.label) plt.sca(axs[1, 1]) plt.scatter(india_data.index, india_data["cfr_same"], color="black", s=2) plt.title("no lag", loc="left", fontdict=plt.theme.label) plt.gca().xaxis.set_major_formatter(formatter)
parse_datetimes(cases.loc[:, "confirmed"]) cases.regency = cases.regency.str.title().map( lambda s: regency_names.get(s, s)) cases.age = cases.age.apply(parse_age) cases = cases.dropna(subset=["age"]) cases["age_bin"] = pd.cut(cases.age, bins=[0] + list(range(20, 80, 10)) + [100]) age_ts = cases[["age_bin", "confirmed"]].groupby(["age_bin", "confirmed"]).size().sort_index() ss_max_rts = {} fig, axs = plt.subplots(4, 2, True, True) (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(age_ts.sum(level = 1), CI = CI, smoothing = notched_smoothing(window = 5), totals = False) plt.sca(axs.flat[0]) plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI).annotate(f"all ages").adjust(left=0.04, right=0.96, top=0.95, bottom=0.05, hspace=0.3, wspace=0.15) r = pd.Series(Rt_pred, index=dates) ss_max_rts["all"] = r[r.index.month_name() == "April"].max() for (age_bin, ax) in zip(age_ts.index.get_level_values(0).categories, axs.flat[1:]): print(age_bin) (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\ = analytical_MPVS(age_ts.loc[age_bin], CI = CI, smoothing = smoothing, totals = False)