def load_observations(fips=None, ref_date=REF_DATE): """ Load observations (new cases, new deaths and hospitalizations) for given fips code. Parameters ---------- fips: str FIPS code. ref_date: Datetime Reference start date. Returns ------- observations: pd.DataFrame Contains observations for given fips codes, with columns: - new_cases: float, observed new cases - new_deaths: float, observed new deaths - hospitalizations: float, observed hospitalizatons and dates of observation as index. """ observations = {} if len(fips) == 5: times, observations['new_cases'], observations['new_deaths'] = \ load_data.load_new_case_data_by_fips(fips, ref_date) hospital_times, hospitalizations, hospitalization_data_type = \ load_data.load_hospitalization_data(fips, t0=ref_date) observations['times'] = times.values elif len(fips) == 2: state_obj = us.states.lookup(fips) observations['times'], observations['new_cases'], observations['new_deaths'] = \ load_data.load_new_case_data_by_state(state_obj.name, ref_date) hospital_times, hospitalizations, hospitalization_data_type = \ load_data.load_hospitalization_data_by_state(state_obj.abbr, t0=ref_date) observations['times'] = np.array(observations['times']) observations['hospitalizations'] = np.full( observations['times'].shape[0], np.nan) if hospitalization_data_type is HospitalizationDataType.CUMULATIVE_HOSPITALIZATIONS: observations['hospitalizations'][ hospital_times - observations['times'].min()] = np.diff(hospitalizations) elif hospitalization_data_type is HospitalizationDataType.CURRENT_HOSPITALIZATIONS: observations['hospitalizations'][ hospital_times - observations['times'].min()] = hospitalizations observation_dates = [ ref_date + timedelta(int(t)) for t in observations['times'] ] observations = pd.DataFrame( observations, index=pd.DatetimeIndex(observation_dates)).dropna(axis=1, how='all') return observations
def __init__(self, fips, ref_date=datetime(year=2020, month=1, day=1), min_deaths=2, n_years=1, cases_to_deaths_err_factor=.5, hospital_to_deaths_err_factor=.5, percent_error_on_max_observation=0.5, with_age_structure=False): # Seed the random state. It is unclear whether this propagates to the # Minuit optimizer. np.random.seed(seed=42) self.fips = fips self.ref_date = ref_date self.min_deaths = min_deaths self.t_list = np.linspace(0, int(365 * n_years), int(365 * n_years) + 1) self.cases_to_deaths_err_factor = cases_to_deaths_err_factor self.hospital_to_deaths_err_factor = hospital_to_deaths_err_factor self.percent_error_on_max_observation = percent_error_on_max_observation self.t0_guess = 60 self.with_age_structure = with_age_structure if len(fips) == 2: # State FIPS are 2 digits self.agg_level = AggregationLevel.STATE self.state_obj = us.states.lookup(self.fips) self.state = self.state_obj.name self.times, self.observed_new_cases, self.observed_new_deaths = \ load_data.load_new_case_data_by_state(self.state, self.ref_date) self.hospital_times, self.hospitalizations, self.hospitalization_data_type = \ load_data.load_hospitalization_data_by_state(self.state_obj.abbr, t0=self.ref_date) self.display_name = self.state else: self.agg_level = AggregationLevel.COUNTY geo_metadata = load_data.load_county_metadata().set_index( 'fips').loc[fips].to_dict() state = geo_metadata['state'] self.state_obj = us.states.lookup(state) county = geo_metadata['county'] if county: self.display_name = county + ', ' + state else: self.display_name = state # TODO Swap for new data source. self.times, self.observed_new_cases, self.observed_new_deaths = \ load_data.load_new_case_data_by_fips(self.fips, t0=self.ref_date) self.hospital_times, self.hospitalizations, self.hospitalization_data_type = \ load_data.load_hospitalization_data(self.fips, t0=self.ref_date) self.cases_stdev, self.hosp_stdev, self.deaths_stdev = self.calculate_observation_errors( ) self.set_inference_parameters() self.model_fit_keys = ['R0', 'eps', 't_break', 'log10_I_initial'] self.SEIR_kwargs = self.get_average_seir_parameters() self.fit_results = None self.mle_model = None self.chi2_deaths = None self.chi2_cases = None self.chi2_hosp = None self.dof_deaths = None self.dof_cases = None self.dof_hosp = None
def __init__( self, fips, window_size=InferRtConstants.COUNT_SMOOTHING_WINDOW_SIZE, kernel_std=5, r_list=np.linspace(0, 10, 501), process_sigma=0.05, ref_date=datetime(year=2020, month=1, day=1), confidence_intervals=(0.68, 0.95), min_cases=5, min_deaths=5, include_testing_correction=True, ): np.random.seed(InferRtConstants.RNG_SEED) # Param Generation used for Xcor in align_time_series, has some stochastic FFT elements. self.fips = fips self.r_list = r_list self.window_size = window_size self.kernel_std = kernel_std self.process_sigma = process_sigma self.ref_date = ref_date self.confidence_intervals = confidence_intervals self.min_cases = min_cases self.min_deaths = min_deaths self.include_testing_correction = include_testing_correction # Because rounding is disabled we don't need high min_deaths, min_cases anymore self.min_cases = min(InferRtConstants.MIN_COUNTS_TO_INFER, self.min_cases) if not InferRtConstants.DISABLE_DEATHS: self.min_deaths = min(InferRtConstants.MIN_COUNTS_TO_INFER, self.min_deaths) if len(fips) == 2: # State FIPS are 2 digits self.agg_level = AggregationLevel.STATE self.state_obj = us.states.lookup(self.fips) self.state = self.state_obj.name ( self.times, self.observed_new_cases, self.observed_new_deaths, ) = load_data.load_new_case_data_by_state( self.state, self.ref_date, include_testing_correction=self.include_testing_correction, ) self.times_raw_new_cases, self.raw_new_cases, _ = load_data.load_new_case_data_by_state( self.state, self.ref_date, include_testing_correction=False ) ( self.hospital_times, self.hospitalizations, self.hospitalization_data_type, ) = load_data.load_hospitalization_data_by_state( state=self.state_obj.abbr, t0=self.ref_date ) self.display_name = self.state else: self.agg_level = AggregationLevel.COUNTY self.geo_metadata = ( load_data.load_county_metadata().set_index("fips").loc[fips].to_dict() ) self.state = self.geo_metadata["state"] self.state_obj = us.states.lookup(self.state) self.county = self.geo_metadata["county"] if self.county: self.display_name = self.county + ", " + self.state else: self.display_name = self.state ( self.times, self.observed_new_cases, self.observed_new_deaths, ) = load_data.load_new_case_data_by_fips( self.fips, t0=self.ref_date, include_testing_correction=self.include_testing_correction, ) ( self.times_raw_new_cases, self.raw_new_cases, _, ) = load_data.load_new_case_data_by_fips( self.fips, t0=self.ref_date, include_testing_correction=False, ) ( self.hospital_times, self.hospitalizations, self.hospitalization_data_type, ) = load_data.load_hospitalization_data(self.fips, t0=self.ref_date) self.case_dates = [ref_date + timedelta(days=int(t)) for t in self.times] self.raw_new_case_dates = [ ref_date + timedelta(days=int(t)) for t in self.times_raw_new_cases ] if self.hospitalization_data_type: self.hospital_dates = [ref_date + timedelta(days=int(t)) for t in self.hospital_times] self.default_parameters = ParameterEnsembleGenerator( fips=self.fips, N_samples=500, t_list=np.linspace(0, 365, 366) ).get_average_seir_parameters() # Serial period = Incubation + 0.5 * Infections self.serial_period = ( 1 / self.default_parameters["sigma"] + 0.5 * 1 / self.default_parameters["delta"] ) # If we only receive current hospitalizations, we need to account for # the outflow to reconstruct new admissions. if ( self.hospitalization_data_type is load_data.HospitalizationDataType.CURRENT_HOSPITALIZATIONS ): los_general = self.default_parameters["hospitalization_length_of_stay_general"] los_icu = self.default_parameters["hospitalization_length_of_stay_icu"] hosp_rate_general = self.default_parameters["hospitalization_rate_general"] hosp_rate_icu = self.default_parameters["hospitalization_rate_icu"] icu_rate = hosp_rate_icu / hosp_rate_general flow_out_of_hosp = self.hospitalizations[:-1] * ( (1 - icu_rate) / los_general + icu_rate / los_icu ) # We are attempting to reconstruct the cumulative hospitalizations. self.hospitalizations = np.diff(self.hospitalizations) + flow_out_of_hosp self.hospital_dates = self.hospital_dates[1:] self.hospital_times = self.hospital_times[1:] self.log_likelihood = None self.log = structlog.getLogger(Rt_Inference_Target=self.display_name) self.log.info(event="Running:")
def __init__(self, fips, window_size=7, kernel_std=2, r_list=np.linspace(0, 10, 501), process_sigma=0.15, ref_date=datetime(year=2020, month=1, day=1), confidence_intervals=(0.68, 0.75, 0.90)): self.fips = fips self.r_list = r_list self.window_size = window_size self.kernel_std = kernel_std self.process_sigma = process_sigma self.ref_date = ref_date self.confidence_intervals = confidence_intervals if len(fips) == 2: # State FIPS are 2 digits self.agg_level = AggregationLevel.STATE self.state_obj = us.states.lookup(self.fips) self.state = self.state_obj.name self.geo_metadata = load_data.load_county_metadata_by_state(self.state).loc[self.state].to_dict() self.times, self.observed_new_cases, self.observed_new_deaths = \ load_data.load_new_case_data_by_state(self.state, self.ref_date) self.hospital_times, self.hospitalizations, self.hospitalization_data_type = \ load_data.load_hospitalization_data_by_state(self.state_obj.abbr, t0=self.ref_date) self.display_name = self.state else: self.agg_level = AggregationLevel.COUNTY self.geo_metadata = load_data.load_county_metadata().set_index('fips').loc[fips].to_dict() self.state = self.geo_metadata['state'] self.state_obj = us.states.lookup(self.state) self.county = self.geo_metadata['county'] if self.county: self.display_name = self.county + ', ' + self.state else: self.display_name = self.state # TODO Swap for new data source. self.times, self.observed_new_cases, self.observed_new_deaths = \ load_data.load_new_case_data_by_fips(self.fips, t0=self.ref_date) self.hospital_times, self.hospitalizations, self.hospitalization_data_type = \ load_data.load_hospitalization_data(self.fips, t0=self.ref_date) logging.info(f'Running Rt Inference for {self.display_name}') self.case_dates = [ref_date + timedelta(days=int(t)) for t in self.times] if self.hospitalization_data_type: self.hospital_dates = [ref_date + timedelta(days=int(t)) for t in self.hospital_times] self.default_parameters = ParameterEnsembleGenerator( fips=self.fips, N_samples=500, t_list=np.linspace(0, 365, 366) ).get_average_seir_parameters() # Serial period = Incubation + 0.5 * Infections self.serial_period = 1 / self.default_parameters['sigma'] + 0.5 * 1 / self.default_parameters['delta'] # If we only receive current hospitalizations, we need to account for # the outflow to reconstruct new admissions. if self.hospitalization_data_type is load_data.HospitalizationDataType.CURRENT_HOSPITALIZATIONS: los_general = self.default_parameters['hospitalization_length_of_stay_general'] los_icu = self.default_parameters['hospitalization_length_of_stay_icu'] hosp_rate_general = self.default_parameters['hospitalization_rate_general'] hosp_rate_icu = self.default_parameters['hospitalization_rate_icu'] icu_rate = hosp_rate_icu / hosp_rate_general flow_out_of_hosp = self.hospitalizations[:-1] * ((1 - icu_rate) / los_general + icu_rate / los_icu) # We are attempting to reconstruct the cumulative hospitalizations. self.hospitalizations = np.diff(self.hospitalizations) + flow_out_of_hosp self.hospital_dates = self.hospital_dates[1:] self.hospital_times = self.hospital_times[1:] self.log_likelihood = None
def __init__( self, fips, ref_date=datetime(year=2020, month=1, day=1), min_deaths=2, n_years=1, cases_to_deaths_err_factor=0.5, hospital_to_deaths_err_factor=0.5, percent_error_on_max_observation=0.5, with_age_structure=False, ): # Seed the random state. It is unclear whether this propagates to the # Minuit optimizer. np.random.seed(seed=42) self.fips = fips self.ref_date = ref_date self.days_since_ref_date = (dt.date.today() - ref_date.date() - timedelta(days=7)).days # ndays end of 2nd ramp may extend past days_since_ref_date w/o penalty on chi2 score self.days_allowed_beyond_ref = 0 self.min_deaths = min_deaths self.t_list = np.linspace(0, int(365 * n_years), int(365 * n_years) + 1) self.cases_to_deaths_err_factor = cases_to_deaths_err_factor self.hospital_to_deaths_err_factor = hospital_to_deaths_err_factor self.percent_error_on_max_observation = percent_error_on_max_observation self.t0_guess = 60 self.with_age_structure = with_age_structure if len(fips) == 2: # State FIPS are 2 digits self.agg_level = AggregationLevel.STATE self.state_obj = us.states.lookup(self.fips) self.state = self.state_obj.name ( self.times, self.observed_new_cases, self.observed_new_deaths, ) = load_data.load_new_case_data_by_state(self.state, self.ref_date) ( self.hospital_times, self.hospitalizations, self.hospitalization_data_type, ) = load_data.load_hospitalization_data_by_state( self.state_obj.abbr, t0=self.ref_date) ( self.icu_times, self.icu, self.icu_data_type, ) = load_data.load_hospitalization_data_by_state( self.state_obj.abbr, t0=self.ref_date, category=HospitalizationCategory.ICU) self.display_name = self.state else: self.agg_level = AggregationLevel.COUNTY geo_metadata = load_data.load_county_metadata().set_index( "fips").loc[fips].to_dict() state = geo_metadata["state"] self.state_obj = us.states.lookup(state) county = geo_metadata["county"] if county: self.display_name = county + ", " + state else: self.display_name = state # TODO Swap for new data source. ( self.times, self.observed_new_cases, self.observed_new_deaths, ) = load_data.load_new_case_data_by_fips(self.fips, t0=self.ref_date) ( self.hospital_times, self.hospitalizations, self.hospitalization_data_type, ) = load_data.load_hospitalization_data(self.fips, t0=self.ref_date) ( self.icu_times, self.icu, self.icu_data_type, ) = load_data.load_hospitalization_data( self.fips, t0=self.ref_date, category=HospitalizationCategory.ICU) self.cases_stdev, self.hosp_stdev, self.deaths_stdev = self.calculate_observation_errors( ) self.set_inference_parameters() self.model_fit_keys = [ "R0", "eps", "t_break", "eps2", "t_delta_phases", "log10_I_initial", ] self.SEIR_kwargs = self.get_average_seir_parameters() self.fit_results = None self.mle_model = None self.chi2_deaths = None self.chi2_cases = None self.chi2_hosp = None self.dof_deaths = None self.dof_cases = None self.dof_hosp = None