def run_Rt_estimation(grp_numbers,smoothing_window, r_window_size): """Code following online example: https://github.com/lo-hfk/epyestim/blob/main/notebooks/covid_tutorial.ipynb smoothing_window of 28 days was found to be most comparable to EpiEstim in this case r_window_size default is 3 if not specified, increasing r_window_size narrows the uncertainity bounds """ simdate = exp_name.split("_")[0] df = pd.read_csv(os.path.join(exp_dir, f'nu_{simdate}.csv')) df['date'] = pd.to_datetime(df['date']) df = df[(df['date'] > pd.Timestamp('2020-03-01'))] df_rt_all = pd.DataFrame() for ems_nr in grp_numbers: if ems_nr == 0: region_suffix = "illinois" else: region_suffix = f'covidregion_{str(ems_nr)}' if region_suffix not in df["geography_modeled"].unique(): continue mdf = df[df['geography_modeled'] == region_suffix] mdf = mdf.set_index('date')['cases_new_median'] """Use default distributions (for covid-19)""" si_distrb, delay_distrb = get_distributions(show_plot=False) df_rt = covid19.r_covid(mdf[:-1], smoothing_window=smoothing_window, r_window_size=r_window_size) df_rt['geography_modeled'] = region_suffix df_rt.reset_index(inplace=True) df_rt = df_rt.rename(columns={'index': 'date', 'Q0.5': 'rt_median', 'Q0.025': 'rt_lower', 'Q0.975': 'rt_upper'}) df_rt['model_date'] = pd.Timestamp(simdate) df_rt = df_rt[['model_date', 'date', 'geography_modeled', 'rt_median', 'rt_lower', 'rt_upper']] # df_rt['smoothing_window'] =smoothing_window # df_rt['r_window_size'] = r_window_size df_rt_all = df_rt_all.append(df_rt) df_rt_all.to_csv(os.path.join(exp_dir, 'rtNU.csv'), index=False) rt_plot(df=df_rt_all, first_day=last_plot_day - pd.Timedelta(60,'days'), last_day=last_plot_day, plotname='rt_by_covidregion_truncated') if not 'rt_median' in df.columns: df_with_rt = pd.merge(how='left', left=df, right=df_rt_all, left_on=['date', 'geography_modeled'], right_on=['date', 'geography_modeled']) df_with_rt.to_csv(os.path.join(exp_dir, f'nu_{simdate}.csv'), index=False) else: print("Warning: Overwriting already present Rt estimates") df = df.drop(['rt_median', 'rt_lower', 'rt_upper'], axis=1) df_with_rt = pd.merge(how='left', left=df, right=df_rt_all, left_on=['date', 'geography_modeled'], right_on=['date', 'geography_modeled']) df_with_rt.to_csv(os.path.join(exp_dir, f'nu_{simdate}.csv'), index=False) rt_plot(df=df_rt_all,plotname='estimated_rt_by_covidregion_full') return df_rt
def run_Rt_estimation(smoothing_window=14, r_window_size=7): """Code following online example: https://github.com/lo-hfk/epyestim/blob/main/notebooks/covid_tutorial.ipynb """ LL_file_date = get_latest_LLfiledate( file_path=os.path.join(datapath, 'covid_IDPH', 'Cleaned Data')) df = pd.read_csv( os.path.join(datapath, 'covid_IDPH', 'Cleaned Data', f'{LL_file_date}_jg_aggregated_covidregion.csv')) df['date'] = pd.to_datetime(df['date']) df = df[df['date'].between(pd.Timestamp('2020-04-01'), pd.Timestamp.today())] df_rt_all = pd.DataFrame() for ems_nr in range(1, 12): mdf = df.loc[df['covid_region'] == ems_nr] mdf.cases = mdf.cases.astype(int) mdf = mdf.set_index('date')['cases'] """Use default distributions (for covid-19)""" si_distrb, delay_distrb = get_distributions(show_plot=False) df_rt = covid19.r_covid(mdf[:-1], smoothing_window=smoothing_window, r_window_size=r_window_size) df_rt['geography_modeled'] = f'covidregion_{ems_nr}' df_rt.reset_index(inplace=True) df_rt = df_rt.rename( columns={ 'index': 'date', 'Q0.5': 'rt_median', 'Q0.025': 'rt_lower', 'Q0.975': 'rt_upper' }) df_rt = df_rt[[ 'date', 'geography_modeled', 'rt_median', 'rt_lower', 'rt_upper' ]] df_rt['smoothing_window'] = smoothing_window df_rt['r_window_size'] = r_window_size df_rt_all = df_rt_all.append(df_rt) df_rt_all.to_csv(os.path.join(plot_path, f'rt_from_LLdata.csv'), index=False) rt_plot(df=df_rt_all, first_day=None, last_day=None) rt_plot(df=df_rt_all, first_day=first_plot_day, last_day=last_plot_day) return df_rt
def run_Rt_estimation(grp_numbers, smoothing_window, r_window_size): """Code following online example: https://github.com/lo-hfk/epyestim/blob/main/notebooks/covid_tutorial.ipynb smoothing_window of 28 days was found to be most comparable to EpiEstim in this case r_window_size default is 3 if not specified, increasing r_window_size narrows the uncertainity bounds """ simdate = exp_name.split("_")[0] df_rt_all = pd.DataFrame() for e, ems_nr in enumerate(grp_numbers): if ems_nr == 0: region_suffix = '_All' region_label = "illinois" else: region_suffix = f'_EMS-{ems_nr}' region_label = f'covidregion_{str(ems_nr)}' print(region_suffix) df = load_sim_data(exp_name, region_suffix=region_suffix) df['date'] = pd.to_datetime(df['date']) """Use default distributions (for covid-19)""" si_distrb, delay_distrb = get_distributions(show_plot=False) df_rt_scen = pd.DataFrame() for s, scen in enumerate(df.scen_num.unique()): print(scen) mdf = df[df['scen_num'] == scen] mdf = mdf.set_index('date')['new_infected'] df_rt = covid19.r_covid(mdf[:-1], smoothing_window=smoothing_window, r_window_size=r_window_size) df_rt.reset_index(inplace=True) df_rt = df_rt.rename( columns={ 'index': 'date', 'Q0.5': 'rt_median', 'Q0.025': 'rt_lower', 'Q0.975': 'rt_upper' }) df_rt['model_date'] = pd.Timestamp(simdate) df_rt['geography_modeled'] = region_label df_rt['scen_num'] = scen df_rt['scen_enumerator'] = s df_rt_scen = df_rt_scen.append(df_rt) df_rt_scen.to_csv(os.path.join( exp_dir, 'rt_trajectories' + region_label + '.csv'), index=False)
def test_r_covid_cutoff(self): np.random.seed(5764) confirmed_cases = pd.Series([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 7, 0, 10, 9, 15, 14, 34, 24, 100, 54, 69, 37, 117, 161, 0, 487, 220, 841, 0, 500, 328, 1047, 1219, 1281, 899, 1321, 1082, 1020, 914, 1117, 1148, 753, 1093, 683, 1163, 1059, 779, 899, 595, 557, 596, 1027, 771, 500, 556, 308, 273, 248, 400, 396, 346, 326, 336, 204, 119, 205, 228, 181, 217, 167, 103, 100, 143, 179, 119, 112, 88, 76, 28, 51, 66, 81, 44, 54, 39, 36, 33, 50, 51, 58, 15, 10, 21, 40, 36, 13, 18, 11, 10, 15, 15, 20, 32, 17, 17, 9, 3, 19, 20, 23, 20, 9, 7, 16, 23, 33, 19, 31, 23, 14, 23, 33, 13, 35, 8, 49, 18, 22, 44, 52, 58, 69, 62, 35, 62, 137, 116, 134, 97, 70, 47, 54, 129, 88, 104, 127, 66, 63, 70, 132, 142, 92, 110, 99, 43, 108, 141, 117, 154, 148, 110, 65, 132, 193, 220, 210, 180, 138, 66, 130, 181, 181, 161, 182, 152, 105, 187, 274, 234, 268, 253, 200, 128, 197, 311, 266, 306, 295, 276, 157, 202, 383, 361, 340, 376, 292, 163, 216, 370, 364, 405, 425, 444, 191, 245, 469, 405, 528, 465, 475, 257, 315, 514, 530, 488, 0, 0, 1095, 286, 437, 391, 372, 0, 0, 782, 225, 411, 550, 552, 0, 0, 1548, 700, 1077, 1172, 1487, 0, 0, 4068, 1445, 2823, 2613, 3105, 0, 0, 8737, 3008, 5596, 5256, 6634, 0, 0, 17440, 5949, 8616, 9386, 9207, 0, 0, 21926, 6126, 10073, 10128, 9409, 0, 0, 17309 ], index=pd.date_range('2020-01-22', periods=293)) result = r_covid(confirmed_cases=confirmed_cases, delay_distribution= generate_onset_to_reporting_distribution_singapore(), quantiles=[0.5]) expected_result = [ 2.59960214, 2.42713596, 2.36801365, 2.44343708, 2.54371424, 2.59963764, 2.53764468, 2.39790211, 2.25661617, 2.1689964, 2.13205504, 2.10722693, 2.05398289, 1.96185502, 1.82583515, 1.67238124, 1.52266781, 1.39083715, 1.28338702, 1.19770325, 1.12809483, 1.06886891, 1.01726886, 0.96890564, 0.92362263, 0.88377431, 0.85152062, 0.82804287, 0.81421856, 0.80843819, 0.80883319, 0.80933912, 0.80606134, 0.79563434, 0.77692084, 0.7529111, 0.72841375, 0.70834919, 0.69675028, 0.69548764, 0.69848636, 0.70033662, 0.6964799, 0.68619966, 0.67762849, 0.67691045, 0.69190406, 0.71940879, 0.75094328, 0.77253703, 0.77903016, 0.77054792, 0.75845293, 0.74712806, 0.74087147, 0.73948702, 0.74120346, 0.73805086, 0.72855332, 0.71533214, 0.70149376, 0.6932034, 0.68991684, 0.68851331, 0.68835197, 0.68483663, 0.68632654, 0.70099351, 0.72582877, 0.76086942, 0.79663442, 0.81960689, 0.82568108, 0.82178255, 0.80762545, 0.79448565, 0.78327463, 0.77209426, 0.76407495, 0.76222464, 0.7704921, 0.79284348, 0.82959292, 0.86446548, 0.89962207, 0.9311943, 0.9579237, 0.98441243, 1.0352803, 1.08957905, 1.11678579, 1.12860319, 1.12464211, 1.11692886, 1.12750097, 1.15296689, 1.1838994, 1.22107088, 1.23894021, 1.2328738, 1.21126435, 1.19421602, 1.18040038, 1.17694589, 1.18363591, 1.19113706, 1.19995364, 1.22028044, 1.26160559, 1.32272207, 1.38064407, 1.44175695, 1.48599467, 1.52381945, 1.55364961, 1.57350822, 1.55720066, 1.49823225, 1.40417142, 1.3034397, 1.21622314, 1.14951094, 1.10627253, 1.07663047, 1.05578566, 1.03038797, 1.00369567, 0.98196295, 0.97145419, 0.97741591, 0.99761393, 1.02539234, 1.05030807, 1.06660386, 1.07399873, 1.08003171, 1.08947449, 1.10413773, 1.11814489, 1.12648959, 1.13177592, 1.13838639, 1.15194605, 1.17238448, 1.19432446, 1.20100826, 1.18565284, 1.15206383, 1.11238385, 1.07845761, 1.05709792, 1.04774574, 1.04638855, 1.04756145, 1.04612918, 1.0469924, 1.05948666, 1.07980256, 1.11200442, 1.14287345, 1.16462608, 1.17054932, 1.16668208, 1.15999564, 1.15751063, 1.15835069, 1.15497534, 1.14132904, 1.11762787, 1.0956717, 1.08353587, 1.08382907, 1.09196015, 1.10182822, 1.10281747, 1.091496, 1.07380952, 1.06056518, 1.05967571, 1.06934734, 1.08446026, 1.09413929, 1.09177153, 1.08346686, 1.07614132, 1.0780672, 1.08780339, 1.10104586, 1.11195392, 1.10750554, 1.09311454, 1.07002757, 1.03841093, 1.00912397, 0.98801527, 0.97950988, 0.97873168, 0.97970125, 0.97381753, 0.95784123, 0.93552611, 0.91356957, 0.90312199, 0.90995052, 0.93441949, 0.97278021, 1.01274102, 1.05082948, 1.09286146, 1.14838029, 1.2297979, 1.34413185, 1.47177478, 1.57404505, 1.62573053, 1.64020524, 1.65240012, 1.68002175, 1.71706891, 1.7367423, 1.70830268, 1.64205551, 1.56982027, 1.52576074, 1.5187258, 1.54104773, 1.5622353, 1.55376551, 1.51205394, 1.45996614, 1.42411088, 1.4130697, 1.42072672, 1.42542989, 1.40307067, 1.351099, 1.28347502, 1.22232452, 1.18062028, 1.15925023, 1.14822265, 1.12199702, 1.07887335, 1.02846493, 0.99358597, 0.97897602, 0.98055132, 0.99108749 ] assert_array_almost_equal(expected_result, result['Q0.5'], 1)
def test_r_covid(self): np.random.seed(5764) confirmed_cases = pd.Series([ 1, 0, 7, 0, 10, 9, 15, 14, 34, 24, 100, 54, 69, 37, 117, 161, 0, 487, 220, 841, 0, 500, 328, 1047, 1219, 1281, 899, 1321, 1082, 1020, 914, 1117, 1148, 753, 1093, 683, 1163, 1059, 779, 899, 595, 557, 596, 1027, 771, 500, 556, 308, 273, 248, 400, 396, 346, 326, 336, 204, 119, 205, 228, 181, 217, 167, 103, 100, 143, 179, 119, 112, 88, 76, 28, 51, 66, 81, 44, 54, 39, 36, 33, 50, 51, 58, 15, 10, 21, 40, 36, 13, 18, 11, 10, 15, 15, 20, 32, 17, 17, 9, 3, 19, 20, 23, 20, 9, 7, 16, 23, 33, 19, 31, 23, 14, 23, 33, 13, 35, 8, 49, 18, 22, 44, 52, 58, 69, 62, 35, 62, 137, 116, 134, 97, 70, 47, 54, 129, 88, 104, 127, 66, 63, 70, 132, 142, 92, 110, 99, 43, 108, 141, 117, 154, 148, 110, 65, 132, 193, 220, 210, 180, 138, 66, 130, 181, 181, 161, 182, 152, 105, 187, 274, 234, 268, 253, 200, 128, 197, 311, 266, 306, 295, 276, 157, 202, 383, 361, 340, 376, 292, 163, 216, 370, 364, 405, 425, 444, 191, 245, 469, 405, 528, 465, 475, 257, 315, 514, 530, 488, 0, 0, 1095, 286, 437, 391, 372, 0, 0, 782, 225, 411, 550, 552, 0, 0, 1548, 700, 1077, 1172, 1487, 0, 0, 4068, 1445, 2823, 2613, 3105, 0, 0, 8737, 3008, 5596, 5256, 6634, 0, 0, 17440, 5949, 8616, 9386, 9207, 0, 0, 21926, 6126, 10073, 10128, 9409, 0, 0, 17309 ], index=pd.date_range( '2020-02-25', '2020-11-09')) result = r_covid(confirmed_cases=confirmed_cases, delay_distribution= generate_onset_to_reporting_distribution_singapore(), quantiles=[0.5]) self.assertTrue( result.index.equals(pd.date_range('2020-03-01', '2020-11-03'))) expected_result = [ 4.58, 3.39, 2.63, 2.31, 2.25, 2.33, 2.36, 2.31, 2.23, 2.17, 2.14, 2.12, 2.07, 1.97, 1.83, 1.67, 1.52, 1.39, 1.28, 1.20, 1.13, 1.07, 1.02, 0.97, 0.92, 0.88, 0.85, 0.83, 0.82, 0.81, 0.81, 0.81, 0.81, 0.80, 0.78, 0.75, 0.73, 0.71, 0.70, 0.70, 0.70, 0.70, 0.70, 0.69, 0.68, 0.68, 0.69, 0.72, 0.75, 0.77, 0.78, 0.77, 0.75, 0.74, 0.74, 0.74, 0.74, 0.74, 0.73, 0.71, 0.70, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.70, 0.73, 0.76, 0.79, 0.82, 0.83, 0.82, 0.81, 0.80, 0.78, 0.77, 0.76, 0.76, 0.76, 0.79, 0.82, 0.87, 0.91, 0.93, 0.96, 1.00, 1.04, 1.09, 1.11, 1.11, 1.12, 1.12, 1.14, 1.16, 1.19, 1.22, 1.24, 1.23, 1.22, 1.19, 1.18, 1.18, 1.18, 1.19, 1.19, 1.22, 1.26, 1.32, 1.38, 1.44, 1.49, 1.53, 1.57, 1.58, 1.56, 1.50, 1.41, 1.31, 1.22, 1.15, 1.11, 1.08, 1.06, 1.03, 1.00, 0.98, 0.97, 0.98, 1.00, 1.03, 1.05, 1.06, 1.07, 1.07, 1.09, 1.10, 1.11, 1.12, 1.13, 1.14, 1.15, 1.17, 1.19, 1.20, 1.19, 1.15, 1.11, 1.07, 1.05, 1.05, 1.04, 1.04, 1.04, 1.04, 1.06, 1.08, 1.11, 1.15, 1.17, 1.17, 1.17, 1.16, 1.16, 1.16, 1.15, 1.14, 1.12, 1.10, 1.08, 1.08, 1.09, 1.10, 1.10, 1.09, 1.08, 1.06, 1.06, 1.07, 1.09, 1.10, 1.09, 1.08, 1.08, 1.07, 1.09, 1.10, 1.11, 1.11, 1.09, 1.07, 1.04, 1.01, 0.99, 0.98, 0.98, 0.98, 0.97, 0.96, 0.94, 0.91, 0.91, 0.91, 0.94, 0.97, 1.01, 1.05, 1.09, 1.15, 1.23, 1.34, 1.47, 1.57, 1.62, 1.64, 1.65, 1.68, 1.72, 1.74, 1.71, 1.64, 1.57, 1.52, 1.52, 1.54, 1.56, 1.55, 1.51, 1.46, 1.42, 1.41, 1.42, 1.43, 1.40, 1.35, 1.28, 1.22, 1.18, 1.16, 1.15, 1.12, 1.08, 1.03, 0.99, 0.98, 0.98, 0.99 ] assert_array_almost_equal(expected_result, result['Q0.5'], 2)