def estimate(time_series: pd.Series) -> pd.DataFrame:
    estimates = analytical_MPVS(time_series,
                                CI=CI,
                                smoothing=smooth,
                                totals=True)
    return pd.DataFrame(
        data={
            "date": estimates[0],
            "Rt": estimates[1],
            "Rt_upper": estimates[2],
            "Rt_lower": estimates[3],
            "total_cases": estimates[-4][2:],
            "new_cases": estimates[-3],
        })
示例#2
0
def run_adaptive_model(df: pd.DataFrame, CI: float, smoothing: Callable,
                       filepath: Path) -> None:
    '''
    Runs adaptive control model of Rt and smoothed case counts based on what is currently in the 
    analytical_MPVS module. Takes in dataframe of cases and saves to csv a dataframe of results.
    '''
    # Initialize results df
    res_full = pd.DataFrame()

    # Loop through each state
    print("Estimating state Rt values...")
    for state in tqdm(df['state'].unique()):

        # Calculate Rt for that state
        state_df = df[df['state'] == state].set_index('date')
        (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper,
         T_CI_lower, total_cases, new_cases_ts, _,
         anomaly_dates) = analytical_MPVS(
             state_df[state_df['positive'] > 0]['positive'],
             CI=CI,
             smoothing=smoothing)
        assert (len(dates) == len(RR_pred))

        # Save results
        res = pd.DataFrame({
            'state': state,
            'date': dates,
            'RR_pred': RR_pred,
            'RR_CI_upper': RR_CI_upper,
            'RR_CI_lower': RR_CI_lower,
            'T_pred': T_pred,
            'T_CI_upper': T_CI_upper,
            'T_CI_lower': T_CI_lower,
            'new_cases_ts': new_cases_ts,
            'total_cases': total_cases[2:],
            'anamoly': dates.isin(set(anomaly_dates))
        })
        res_full = pd.concat([res_full, res], axis=0)

    # Merge results back onto input df and return
    merged_df = df.merge(res_full, how='outer', on=['state', 'date'])
    merged_df.to_csv(filepath / "adaptive_estimates.csv")
示例#3
0
smoothing = 10
CI = 0.95

state_cases = pd.read_csv(data / "Bihar_cases_data_Oct03.csv",
                          parse_dates=["date_reported"],
                          dayfirst=True)
state_ts = state_cases["date_reported"].value_counts().sort_index()
district_names, population_counts, _ = etl.district_migration_matrix(
    data / "Migration Matrix - District.csv")
populations = dict(zip(district_names, population_counts))

# first, look at state level predictions
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     state_ts,
     CI=CI,
     smoothing=notched_smoothing(window=smoothing),
     totals=False)

plt.Rt(dates, Rt_pred[1:], Rt_CI_upper[1:], Rt_CI_lower[1:], CI, ymin=0, ymax=4)\
    .title("\nBihar: Reproductive Number Estimate")\
    .annotate(f"data from {str(dates[0]).split()[0]} to {str(dates[-1]).split()[0]}")\
    .xlabel("date")\
    .ylabel("$R_t$", rotation=0, labelpad=20)\
    .show()

np.random.seed(33)
Bihar = SIR("Bihar",
            99_000_000,
            dT0=T_pred[-1],
            Rt0=Rt_pred[-1],
示例#4
0
# model details
CI = 0.95
smoothing = 30
alpha = 3.8
beta = 2.25
vs = 0.999

true_Rt = pd.read_table("./true_Rt.txt", dtype="float", squeeze=True)
obs_cases = pd.read_table("./obs_cases.txt", dtype="float", squeeze=True)

(dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     obs_cases,
     CI=CI,
     alpha=alpha,
     beta=beta,
     variance_shift=vs,
     smoothing=lambda ts: box_filter(ts, smoothing, smoothing // 2))

print("Rt today:", RR_pred[-1])
print("a, b, vs, MSE:", alpha, beta, vs,
      ((true_Rt.loc[len(true_Rt) - len(RR_pred):] - RR_pred)**2).sum())

plot_RR_est(dates, RR_pred, RR_CI_lower, RR_CI_upper, CI)\
    .ylabel("Estimated $R_t$")\
    .title("Synthetic Data Estimation")\
    .size(11, 8)
plt.plot(true_Rt.index, true_Rt.values, 'k--', label="True $R_t$")
plt.xlim(0, 150)
plt.ylim(0, 2.5)
         label="state test-scaled")
plt.plot(df[state][:, "delta", "confirmed"], label="confirmed")
plt.legend()
plt.PlotDevice().title(f"\n{state} / case scaling comparison").xlabel(
    "\ndate").ylabel("cases\n")
plt.show()

# I vs D estimators
gamma = 0.2
window = 7 * days
CI = 0.95
smooth = notched_smoothing(window)

(dates_I, Rt_I, Rtu_I, Rtl_I, *_) = analytical_MPVS(df[state][:, "delta",
                                                              "confirmed"],
                                                    CI=CI,
                                                    smoothing=smooth,
                                                    totals=False)
(dates_D, Rt_D, Rtu_D, Rtl_D, *_) = analytical_MPVS(df[state][:, "delta",
                                                              "deceased"],
                                                    CI=CI,
                                                    smoothing=smooth,
                                                    totals=False)

plt.Rt(dates_I, Rt_I, Rtu_I, Rtl_I, CI)\
    .title(f"{state} - $R_t(I)$ estimator")
plt.figure()
plt.Rt(dates_D, Rt_D, Rtu_D, Rtl_D, CI)\
    .title(f"{state} - $R_t(D)$ estimator")

plt.show()
示例#6
0
        return None
    
cases = pd.read_csv("data/1 Nop 2020 Data collection template update South Sulawesi_update (01112020) (2).csv", usecols = schema.keys())\
        .rename(columns = schema)\
        .dropna(how = 'all')
parse_datetimes(cases.loc[:, "confirmed"])
cases.regency = cases.regency.str.title().map(lambda s: regency_names.get(s, s))
cases.age     = cases.age.apply(parse_age)
cases = cases.query("regency == 'Makassar'").dropna(subset = ["age"])
cases["age_bin"] = pd.cut(cases.age, [0, 20, 100], labels = ["school", "nonschool"])
cases = cases[cases.confirmed <= "Oct 25, 2020"]

age_ts = cases[["age_bin", "confirmed"]].groupby(["age_bin", "confirmed"]).size().sort_index()

(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(age_ts.loc["school"], CI = CI, smoothing = smoothing, totals = False)

school_Rt = np.mean(Rt_pred[-14:])
school_T_lb = T_CI_lower[-1]
school_T_ub = T_CI_upper[-1]

plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI)\
    .title("\nMakassar: Reproductive Number Estimate: school-age population")\
    .xlabel("\ndate")\
    .ylabel("$R_t$\n", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()

(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(age_ts.loc["nonschool"], CI = CI, smoothing = smoothing, totals = False)
run_date     = str(pd.Timestamp.now()).split()[0]

ts = get_time_series(df, "detected_state")

states = ["Maharashtra", "Bihar", "Delhi", "Andhra Pradesh", "Telangana", "Tamil Nadu", "Madhya Pradesh"]

for state in states: 
    print(state)
    print("  + running estimation...")
    (
        dates,
        Rt_pred, RR_CI_upper, RR_CI_lower,
        T_pred, T_CI_upper, T_CI_lower,
        total_cases, new_cases_ts,
        anomalies, anomaly_dates
    ) = analytical_MPVS(ts.loc[state].Hospitalized, CI = CI, smoothing = notched_smoothing(window = smoothing), totals = False)
    estimates = pd.DataFrame(data = {
        "dates": dates,
        "Rt_pred": Rt_pred,
        "RR_CI_upper": RR_CI_upper,
        "RR_CI_lower": RR_CI_lower,
        "T_pred": T_pred,
        "T_CI_upper": T_CI_upper,
        "T_CI_lower": T_CI_lower,
        "total_cases": total_cases[2:],
        "new_cases_ts": new_cases_ts,
    })
    print("  + Rt today:", Rt_pred[-1])

    # plt.Rt(dates, Rt_pred, RR_CI_lower, RR_CI_upper, CI)\
    #     .ylabel("Estimated $R_t$")\
figs = root / "figs/comparison/kaggle"

states = [
    "Maharashtra"
]  #, "Bihar", "Delhi", "Andhra Pradesh", "Telangana", "Tamil Nadu", "Madhya Pradesh"]

kaggle = pd.read_csv(data / "covid_19_india.csv",
                     parse_dates=[1],
                     dayfirst=True).set_index("Date")

for state in states:
    print(state)
    print("  + running estimation...")
    (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
     total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
         kaggle[kaggle["State/UnionTerritory"] == state].Confirmed,
         CI=CI,
         smoothing=lambda ts: box_filter(ts, smoothing, 3))

    estimates = pd.DataFrame(
        data={
            "dates": dates,
            "RR_pred": RR_pred,
            "RR_CI_upper": RR_CI_upper,
            "RR_CI_lower": RR_CI_lower,
            "T_pred": T_pred,
            "T_CI_upper": T_CI_upper,
            "T_CI_lower": T_CI_lower,
            "total_cases": total_cases[2:],
            "new_cases_ts": new_cases_ts,
        })
    print("  + Rt today:", RR_pred[-1])
示例#9
0
(data, figs) = setup(level = "INFO")
# for province in provinces:
#     logger.info("downloading data for %s", province)
#     download_data(data, filename(province), base_url = "https://data.covid19.go.id/public/api/")

province_cases = {province: load_province_timeseries(data, province) for province in provinces}
bgn = min(cases.index.min() for cases in province_cases.values())
end = max(cases.index.max() for cases in province_cases.values())
idx = pd.date_range(bgn, end)
province_cases = {province: cases.reindex(idx, method = "pad").fillna(0) for (province, cases) in province_cases.items()}
natl_cases = sum(province_cases.values())


logger.info("running national-level Rt estimate")
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
     = analytical_MPVS(natl_cases, CI = CI, smoothing = smoothing) 

plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI, ymin=0, ymax=4)\
    .title("\nIndonesia: Reproductive Number Estimate")\
    .xlabel("\ndate")\
    .ylabel("$R_t$", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()

logger.info("running case-forward prediction")
IDN = SIR("IDN", 267.7e6, dT0 = T_pred[-1], Rt0 = Rt_pred[-1], mobility = 0, random_seed = 0).run(14)


logger.info("province-level projections")
migration = np.zeros((len(provinces), len(provinces)))
estimates = []
    province: load_province_timeseries(data, province, "Apr 1, 2020")
    for province in provinces
}
bgn = min(cases.index.min() for cases in province_cases.values())
end = max(cases.index.max() for cases in province_cases.values())
idx = pd.date_range(bgn, end)
province_cases = {
    province: cases.reindex(idx, method="pad").fillna(0)
    for (province, cases) in province_cases.items()
}

prediction_period = 14 * days
for province in provinces:
    title = province.title().replace("Dki", "DKI")
    logger.info(title)
    (dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
         = analytical_MPVS(province_cases[province], CI = CI, smoothing = smoothing)
    IDN = Model.single_unit(name = province, population = priority_pops[province], I0 = T_pred[-1], RR0 = RR_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0)\
            .run(prediction_period)

    plt.Rt(dates, RR_pred, RR_CI_upper, RR_CI_lower, CI, ymin=0.2, ymax=4.5)\
        .title(f"{title}")\
        .xlabel("\ndate")\
        .ylabel("$R_t$", rotation=0, labelpad=30)\
        .show()

    # plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI, IDN[0].delta_T[:-1], IDN[0].lower_CI[1:], IDN[0].upper_CI[1:])\
    #     .title(f"\n{title}")\
    #     .xlabel("\ndate")\
    #     .ylabel("cases")\
    #     .show()
    .set_axis(dkij.columns.str.lower(), 1)\
    .assign(
        district    = dkij.district.str.title(),
        subdistrict = dkij.subdistrict.apply(lambda name: next((k for (k, v) in replacements.items() if name in v), name)), 
    )

district_cases = dkij.groupby(["district", "date_positiveresult"])["id"].count().sort_index()
districts = sorted(dkij.district.unique())
migration = np.zeros((len(districts), len(districts)))
R_mandatory = dict()
R_voluntary = dict() 
max_len = 1 + max(map(len, districts))
with tqdm(districts) as progress:
    for district in districts:
        progress.set_description(f"{district :<{max_len}}")
        (dates, RR_pred, *_) = analytical_MPVS(district_cases.loc[district], CI = CI, smoothing = notched_smoothing(window = window), totals=False)
        Rt = pd.DataFrame(data = {"Rt": RR_pred[1:]}, index = dates)
        R_mandatory[district] = np.mean(Rt[(Rt.index > "April 1, 2020") & (Rt.index < "June 1, 2020")])[0]
        R_voluntary[district] = np.mean(Rt[(Rt.index < "April 1, 2020")])[0]

pops = [
    2_430_410,
    910_381,
    2_164_070,
    2_817_994,
    1_729_444,
    23_011
]

gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\
         .query("NAME_1 == 'Jakarta Raya'")\
示例#12
0
cases = pd.read_stata(data/"coviddkijakarta_290920.dta")\
        .query("province == 'DKI JAKARTA'")\
        .drop(columns = dkij_drop_cols + ["province"])
cases = cases\
    .set_axis(cases.columns.str.lower(), 1)\
    .assign(
        district    = cases.district.str.title(),
        subdistrict = cases.subdistrict.apply(lambda name: next((k for (k, v) in replacements.items() if name in v), name)),
    )

cases["age_bin"] = pd.cut(cases.age,
                          bins=[0] + list(range(20, 80, 10)) + [100])
age_ts = cases[["age_bin", "date_positiveresult"
                ]].groupby(["age_bin",
                            "date_positiveresult"]).size().sort_index()
dkij_max_rts = {}

(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(age_ts.sum(level = 1), CI = CI, smoothing = smoothing, totals = False)
r = pd.Series(Rt_pred, index=dates)
dkij_max_rts["all"] = r[r.index.month_name() == "April"].max()

for age_bin in age_ts.index.get_level_values(0).categories:
    print(age_bin)
    (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
        = analytical_MPVS(age_ts.loc[age_bin], CI = CI, smoothing = smoothing, totals = False)
    r = pd.Series(Rt_pred, index=dates)
    dkij_max_rts[age_bin] = r[r.index.month_name() == "April"].max()

print(dkij_max_rts)
def estimate(ts, smoothing):
    (state_dates, R, *_) = analytical_MPVS(ts.Hospitalized, smoothing = smoothing)
    dates = [sd[1] if isinstance(sd, tuple) else sd for sd in state_dates]
    return pd.DataFrame({"date": dates, "R": R}).set_index("date")
示例#14
0
    df = load_all_data(
        v3_paths = [data/filepath for filepath in paths['v3']], 
        v4_paths = [data/filepath for filepath in paths['v4']]
    )
    data_recency = str(df["date_announced"].max()).split()[0]
    run_date     = str(pd.Timestamp.now()).split()[0]

    ts = get_time_series(df[df.detected_state == "Delhi"])

    (
        dates,
        RR_pred, RR_CI_upper, RR_CI_lower,
        T_pred, T_CI_upper, T_CI_lower,
        total_cases, new_cases_ts,
        anomalies, anomaly_dates
    ) = analytical_MPVS(ts.delta[ts.delta > 0], CI = CI, smoothing = convolution(window = smoothing)) 
    #= analytical_MPVS(ts.Hospitalized[ts.Hospitalized > 0], CI = CI, smoothing = lambda ts: box_filter(ts, smoothing, 10))

    np.random.seed(33)
    delhi = Model([ModelUnit("Delhi", 18_000_000, I0 = T_pred[-1], RR0 = RR_pred[-1], mobility = 0)])
    delhi.run(14, np.zeros((1,1)))

    t_pred = [dates[-1] + pd.Timedelta(days = i) for i in range(len(delhi[0].delta_T))]

    plot_RR_est(dates, RR_pred, RR_CI_upper, RR_CI_lower, CI)
    PlotDevice().title("Delhi: Reproductive Number Estimate").xlabel("Date").ylabel("Rt", rotation=0, labelpad=20)
    plt.show()
    
    delhi[0].lower_CI[0] = T_CI_lower[-1]
    delhi[0].upper_CI[0] = T_CI_upper[-1]
    print(delhi[0].delta_T)
示例#15
0
    #     plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI,
    #         prediction_ts = [
    #             (current[0].delta_T[1:], current[0].lower_CI[1:], current[0].upper_CI[1:], "orange", r"projection with current $R_t$"),
    #             (target[0].delta_T[1:],  target[0].lower_CI[1:],  target[0].upper_CI[1:],  "green",  r"projection with $R_t \rightarrow 0.9$")
    #         ])\
    #         .adjust(left = 0.10, right = 0.95, bottom = 0.15, top = 0.95)\
    #         .xlabel("date")\
    #         .ylabel("cases")\
    #         .show()

    # run Indian states
    for (state, pop) in [("Maharashtra", 112374333), ("Gujarat", 60439692),
                         ("Bihar", 104099452)]:
        print(state)
        (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates) \
            = analytical_MPVS(india[india.state == state][["date", "confirmed"]].set_index("date")  , CI = CI, smoothing = smoothing, totals=True)
        dates = [pd.Timestamp(_).to_pydatetime().date() for _ in dates]
        anomaly_dates = [
            pd.Timestamp(_).to_pydatetime().date() for _ in anomaly_dates
        ]
        model = lambda: Model.single_unit(name=state,
                                          RR0=Rt_pred[-1],
                                          population=pop,
                                          infectious_period=infectious_period,
                                          I0=T_pred[-1],
                                          lower_CI=T_CI_lower[-1],
                                          upper_CI=T_CI_upper[-1],
                                          random_seed=33)
        forward_pred_period = 9
        t_pred = [
            dates[-1] + pd.Timedelta(days=i)
示例#16
0
time_series["delta_I"] = time_series.groupby(level=0)['Hospitalized'].diff()
time_series["dow"] = time_series.index.get_level_values(1).dayofweek
plot_average_change(time_series,
                    "(All India)",
                    filename=figs / "avg_delta_I_DoW_India.png")
for state in tqdm(time_series.index.get_level_values(0).unique()):
    plot_average_change(time_series.loc[state],
                        f"({state})",
                        filename=figs / f"avg_delta_I_DoW_{state}.png")

# are anomalies falling on certain days?
print("checking anomalies...")
smoothing = 5
(*_,
 anomaly_dates) = analytical_MPVS(natl_time_series["Hospitalized"].iloc[:-1],
                                  CI=0.95,
                                  smoothing=convolution(window=smoothing))
anomaly_histogram(anomaly_dates,
                  "(All India)",
                  filename=figs / "anomaly_DoW_hist_India.png")
for state in tqdm(time_series.index.get_level_values(0).unique()):
    (*_, anomaly_dates) = analytical_MPVS(
        time_series.loc[state]["Hospitalized"].iloc[:-1],
        CI=0.95,
        smoothing=convolution(window=smoothing))
    anomaly_histogram(anomaly_dates,
                      f"({state})",
                      filename=figs / f"anomaly_DoW_hist_{state}.png")

print("estimating spectral densities...")
# what does the aggregate spectral density look like?
示例#17
0
                       for _ in top_level[timeseries]],
                      columns=["date", "total_cases"])
    df["date"] = (date_scale * df["date"]).apply(pd.Timestamp)
    return df.set_index("date")


logger.info("district-level projections")

pops = [sum([2_430_410, 910_381, 2_164_070, 2_817_994, 1_729_444, 23_011])]
dkij = load_province_timeseries(data, district)
R_mandatory = dict()
R_voluntary = dict()

(dates, Rt_pred,
 *_) = analytical_MPVS(dkij,
                       CI=CI,
                       smoothing=notched_smoothing(window=window),
                       totals=True)
Rt = pd.DataFrame(data={"Rt": Rt_pred}, index=dates)
R_mandatory[district] = np.mean(Rt[(Rt.index >= "Sept 21, 2020")])[0]
R_voluntary[district] = np.mean(Rt[(Rt.index < "April 1, 2020")])[0]

si, sf = 0, 10

simulation_results = [
    run_policies([dkij.iloc[-1][0] - dkij.iloc[-2][0]],
                 pops,
                 districts,
                 np.zeros((1, 1)),
                 gamma,
                 R_mandatory,
                 R_voluntary,
示例#18
0
CI = 0.95

# private data
state_cases = pd.read_csv(data / "Bihar_cases_data_Jul23.csv",
                          parse_dates=["date_reported"],
                          dayfirst=True)
state_ts = state_cases["date_reported"].value_counts().sort_index()
district_names, population_counts, _ = etl.district_migration_matrix(
    data / "Migration Matrix - District.csv")
populations = dict(zip(district_names, population_counts))

# first, look at state level predictions
(dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies,
 anomaly_dates) = analytical_MPVS(state_ts,
                                  CI=CI,
                                  smoothing=convolution(window=smoothing))

plot_RR_est(dates, RR_pred, RR_CI_upper, RR_CI_lower, CI, ymin=0, ymax=4)\
    .title("Bihar: Reproductive Number Estimate Comparisons")\
    .xlabel("Date")\
    .ylabel("Rt", rotation=0, labelpad=20)
plt.ylim(0, 4)

# public data
paths = {
    "v3": [data_path(_) for _ in (1, 2)],
    "v4": [data_path(_) for _ in range(3, 13)]
}

for target in paths['v3'] + paths['v4']:
示例#19
0
         .query("province == 'DKI JAKARTA'")\
         .drop(columns=dkij_drop_cols + ["province"])
dkij["district"] = dkij.district.str.title()

gdf = gpd.read_file("data/gadm36_IDN_shp/gadm36_IDN_2.shp")\
         .query("NAME_1 == 'Jakarta Raya'")\
         .drop(columns=shp_drop_cols)
bbox = shapely.geometry.box(minx = 106.65, maxx = 107.00, miny = -6.40, maxy=-6.05)
gdf = gdf[gdf.intersects(bbox)]

jakarta_districts = dkij.district.str.title().unique()
jakarta_cases = dkij.groupby("date_positiveresult")["id"].count().rename("cases")

logger.info("running province-level Rt estimate")
(dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(jakarta_cases, CI = CI, smoothing = smoothing, totals=False) 

plt.Rt(dates, RR_pred[1:], RR_CI_upper[1:], RR_CI_lower[1:], CI)\
    .title("\nDKI Jakarta: Reproductive Number Estimate")\
    .xlabel("\ndate")\
    .ylabel("$R_t$\n", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()


logger.info("running case-forward prediction")
prediction_period = 14*days
IDN = SIR(name = "IDN", population = 267.7e6, dT0 = T_pred[-1], Rt0 = RR_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0)\
           .run(prediction_period)
 
plt.daily_cases(dates, T_pred[1:], T_CI_upper[1:], T_CI_lower[1:], new_cases_ts[1:], anomaly_dates, anomalies, CI, 
示例#20
0
plt.plot(sir_model.dT)
plt.show()
plt.plot(R0_timeseries, "-", color="black", label="$R_0$")
plt.plot(sir_model.Rt, "-", color="dodgerblue", label="$R_t$")
plt.legend(framealpha=1, handlelength=1, loc="best")
plt.PlotDevice().xlabel("time").ylabel("reproductive rate").adjust(left=0.10,
                                                                   bottom=0.15,
                                                                   right=0.99,
                                                                   top=0.99)
plt.ylim(0.5, 1.5)
plt.show()

# 1: parametric scheme:
dates, Rt, Rt_lb, Rt_ub, *_, anomalies, anomaly_dates = analytical_MPVS(
    pd.DataFrame(sir_model.dT),
    smoothing=convolution("uniform", 2),
    CI=0.99,
    totals=False)
pd = plt.Rt(dates, Rt, Rt_ub, Rt_lb, ymin = 0.5, ymax = 2.5, CI = 0.99, yaxis_colors = False, format_dates = False, critical_threshold = False)\
    .xlabel("time")\
    .ylabel("reproductive rate")\
    .adjust(left = 0.11, bottom = 0.15, right = 0.98, top = 0.98)
plt.plot(sir_model.Rt, "-", color="white", linewidth=3, zorder=10)
sim_rt, = plt.plot(sir_model.Rt,
                   "-",
                   color="dodgerblue",
                   linewidth=2,
                   zorder=11)
anoms = plt.vlines(anomaly_dates, 0, 4, colors="red", linewidth=2, alpha=0.5)
plt.legend([pd.markers["Rt"], sim_rt, anoms],
           ["Estimated $R_t$ (99% CI)", "simulated $R_t$", "anomalies"],
示例#21
0
gamma = 0.2
smoothing = 10
CI = 0.95

download_data(data, 'state_wise_daily.csv')

state_df = load_statewise_data(data / "state_wise_daily.csv")
country_time_series = get_time_series(state_df)

estimates = []
timeseries = []

# country level
(dates, RR_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies, anomaly_dates) = analytical_MPVS(
     country_time_series["Hospitalized"].iloc[:-1],
     CI=CI,
     smoothing=notched_smoothing(window=smoothing))

country_code = state_name_lookup["India"]
for row in zip(dates, RR_pred, RR_CI_upper, RR_CI_lower):
    timeseries.append((country_code, *row))

# state level rt estimates
state_time_series = get_time_series(state_df, 'state')
state_names = list(state_time_series.index.get_level_values(level=0).unique())
max_len = 1 + max(map(len, state_names))
with tqdm(state_names) as states:
    for state in states:
        state_code = state_name_lookup[state]
        states.set_description(f"{state :<{max_len}}")
        try:
示例#22
0
#     .apply(get_generation_interval, axis = 1)\
#     .dropna()\
#     .value_counts()\
#     .sort_index()
# generation_interval =  generation_interval[(generation_interval.index >= 0) & (generation_interval.index <= 60)]
# generation_interval /= generation_interval.sum()

new_cases = cases.confirmed.value_counts().sort_index()
new_cases_smoothed = smoothing(new_cases)
plt.plot(new_cases, '.', color = "blue")
plt.plot(new_cases.index, new_cases_smoothed, '-', color = "black")
plt.show()

logger.info("running province-level Rt estimate")
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(new_cases, CI = CI, smoothing = smoothing, totals = False)

plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, CI)\
    .title("\nSouth Sulawesi: Reproductive Number Estimate")\
    .xlabel("\ndate")\
    .ylabel("$R_t$\n", rotation=0, labelpad=30)\
    .annotate(f"\n{window}-day smoothing window, gamma-prior Bayesian estimation method")\
    .show()

logger.info("running case-forward prediction")
prediction_period = 14*days
I0 = (~cases.confirmed.isna()).sum() - (~cases.recovered.isna()).sum() - (~cases.died.isna()).sum()
IDN = SIR(name = "IDN", population = 8_819_500, dT0 = T_pred[-1], Rt0 = Rt_pred[-1], upper_CI = T_CI_upper[-1], lower_CI = T_CI_lower[-1], mobility = 0, random_seed = 0, I0 = I0)\
           .run(prediction_period)

plt.daily_cases(dates, T_pred, T_CI_upper, T_CI_lower, new_cases_ts, anomaly_dates, anomalies, CI, 
示例#23
0
age_structure_norm = sum(IN_age_structure.values())
IN_age_ratios = np.array(
    [v / age_structure_norm for (k, v) in IN_age_structure.items()])
split_by_age = lambda v: (v * IN_age_ratios).astype(int)

# get age-specific prevalence from KA sero
KA = pd.read_stata("data/ka_cases_deaths_time_newagecat.dta")

KA.agecat = KA.agecat.where(
    KA.agecat != 85,
    75)  # we don't have econ data for 85+ so combine 75+ and 85+ categories
KA_agecases = KA.groupby(["agecat", "date"])["patientcode"]\
    .count().sort_index().rename("cases")\
    .unstack().fillna(0).stack()
KA_ts = KA_agecases.sum(level=1)
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower,
 total_cases, new_cases_ts, anomalies,
 anomaly_dates) = analytical_MPVS(KA_ts, notched_smoothing(5))

COVID_age_ratios = (KA_agecases.sum(level=0) / KA_agecases.sum()).values
split_by_prevalence = lambda v: (v * IN_age_ratios).astype(int)

for seed in range(10):
    model = AgeStructured("KA", 6.11e7, 857, 915345, 1.826,
                          diag(u) @ C, IN_age_ratios, COVID_age_ratios, seed)
    counter = 0
    while model.dT[-1].sum() > 0:
        model.forward_epi_step()
        counter += 1
    print(seed, counter, model.dT)
    if vax_pct_annual_goal == 0 and vax_effectiveness != 1.00:
        continue
    # grab time series
    D, R = ts.loc[district][["dD", "dR"]].sum()

    dT_conf_district = ts.loc[district].dT
    dT_conf_district = dT_conf_district.reindex(pd.date_range(
        dT_conf_district.index.min(), dT_conf_district.index.max()),
                                                fill_value=0)
    dT_conf_district_smooth = pd.Series(
        smooth(dT_conf_district),
        index=dT_conf_district.index).clip(0).astype(int)

    # run Rt estimation on scaled timeseries
    (Rt_dates, Rt_est, *_) = analytical_MPVS(T_ratio * dT_conf_district_smooth,
                                             CI=CI,
                                             smoothing=lambda _: _,
                                             totals=False)
    Rt = dict(zip(Rt_dates, Rt_est))

    daily_rate = vax_pct_annual_goal / 365
    daily_vax_doses = int(vax_effectiveness * daily_rate * N_district)

    T_scaled = dT_conf_district_smooth.cumsum()[simulation_start] * T_ratio

    model = SIR(
        name=state,
        population=N_district,
        dT0=np.ones(num_sims) *
        (dT_conf_district_smooth[simulation_start] * T_ratio).astype(int),
        Rt0=Rt[simulation_start] * N_district / (N_district - T_scaled),
        I0=np.ones(num_sims) * (T_scaled - R - D),
        .query("age.str.strip() != ''", engine = "python")
parse_datetimes(cases.loc[:, "confirmed"])
cases.regency = cases.regency.str.title().map(
    lambda s: regency_names.get(s, s))
cases.age = cases.age.apply(parse_age)
cases = cases.dropna(subset=["age"])
cases["age_bin"] = pd.cut(cases.age,
                          bins=[0] + list(range(20, 80, 10)) + [100])
age_ts = cases[["age_bin",
                "confirmed"]].groupby(["age_bin",
                                       "confirmed"]).size().sort_index()
ss_max_rts = {}

fig, axs = plt.subplots(4, 2, True, True)
(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    = analytical_MPVS(age_ts.sum(level = 1), CI = CI, smoothing = notched_smoothing(window = 5), totals = False)
plt.sca(axs.flat[0])
plt.Rt(dates, Rt_pred, Rt_CI_upper, Rt_CI_lower,
       CI).annotate(f"all ages").adjust(left=0.04,
                                        right=0.96,
                                        top=0.95,
                                        bottom=0.05,
                                        hspace=0.3,
                                        wspace=0.15)
r = pd.Series(Rt_pred, index=dates)
ss_max_rts["all"] = r[r.index.month_name() == "April"].max()

for (age_bin,
     ax) in zip(age_ts.index.get_level_values(0).categories, axs.flat[1:]):
    print(age_bin)
    (dates, Rt_pred, Rt_CI_upper, Rt_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
    gamma  = 0.2
    window = 10
    CI = 0.95

    state_cases = pd.read_csv(data/"Bihar_cases_data_Oct03.csv", parse_dates=["date_reported", "date_status_change"], dayfirst=True)
    state_cases["geo_reported"] = state_cases.geo_reported.str.strip()
    state_cases = state_cases[state_cases.date_reported <= "2020-09-30"]
    state_ts = state_cases["date_reported"].value_counts().sort_index()
    district_ts = state_cases.groupby(["geo_reported", "date_reported"])["date_reported"].count().sort_index()
    districts, pops, migrations = etl.district_migration_matrix(data/"Migration Matrix - District.csv")
    districts = sorted([etl.replacements.get(dn, dn) for dn in districts])
    
    R_mandatory = dict()
    for district in districts:#district_ts.index.get_level_values(0).unique():
        try: 
            (_, Rt, *_) = analytical_MPVS(district_ts.loc[district], CI = CI, smoothing = notched_smoothing(window = 10), totals = False)
            Rm = np.mean(Rt)
        except ValueError as v:
            Rm = 1.5
        R_mandatory[district] = Rm
    
    R_voluntary = {district: 1.2*R for (district, R) in R_mandatory.items()}

    si, sf = 0, 10

    simulation_results = [ 
        run_policies(state_cases, pops, districts, migrations, gamma, R_mandatory, R_voluntary, lockdown_period = lockdown_period, total = total_time, seed = seed)
        for seed in tqdm(range(si, sf))
    ]

    plt.simulations(
示例#27
0
ts_full = get_time_series(df, "detected_state")
ts = ts_full.query("status_change_date <= 'October 14, 2020'")

states    = ["Bihar", "Maharashtra", "Punjab", "Tamil Nadu"][-1:]
codes     = ["BR",    "MH",          "PN",     "TN"][-1:]
pops      = [99.02e6, 114.2e6,       27.98e6,  67.86e6][-1:]
Rt_ranges = [(0.9, 2.4), (0.55, 2.06), (0.55, 2.22), (0.84, 1.06)][-1:]
windows   = [7, 14, 7, 10][-1:]


for (state, code, pop, Rt_range, smoothing) in zip(states, codes, pops, Rt_ranges, windows): 
    print(state)
    print("  + running estimation...")
    state_ts_full = pd.Series(data = notched_smoothing(window = smoothing)(ts_full.loc[state].Hospitalized), index = ts_full.loc[state].Hospitalized.index)
    (dates, Rt_pred, RR_CI_upper, RR_CI_lower, T_pred, T_CI_upper, T_CI_lower, total_cases, new_cases_ts, anomalies, anomaly_dates)\
        = analytical_MPVS(ts.loc[state].Hospitalized, CI = CI, smoothing = lambda x:x, totals = False)
    Rt = pd.DataFrame({"Rt": Rt_pred}, index = dates)
    Rt_m = np.mean(Rt[(Rt.index >= "31 March, 2020") & (Rt.index <= "17 May, 2020")])[0]
    Rt_v = np.mean(Rt[(Rt.index <  "31 March, 2020")])[0]
    print("  + Rt today:", Rt_pred[-1])
    print("  + Rt_m    :", Rt_m)
    print("  + Rt_v    :", Rt_v)
    historical = pd.DataFrame({"smoothed": new_cases_ts}, index = dates)

    plt.Rt(dates, Rt_pred, RR_CI_lower, RR_CI_upper, CI)\
        .ylabel("$R_t$")\
        .xlabel("date")\
        .title(f"\n{state}: Reproductive Number Estimate")\
        .annotate(f"public data from {str(dates[0]).split()[0]} to {str(dates[-1]).split()[0]}")\
        .show()