def test_fips_metadata(nyc_fips): combined_datasets.build_us_timeseries_with_all_fields() fitter = initial_conditions_fitter.InitialConditionsFitter(nyc_fips) assert fitter.state == "NY" assert fitter.county == "New York County" assert fitter.data_start_date == pd.Timestamp("2020-03-01") # Checking to make sure that y is a numpy array rather than a pandas DF. assert isinstance(fitter.y, numpy.ndarray)
def _cache_global_datasets(): # Populate cache for combined latest and timeseries. Caching pre-fork # will make sure cache is populated for subprocesses. Return value # is not needed as the only goal is to populate the cache. combined_datasets.build_us_latest_with_all_fields() combined_datasets.build_us_timeseries_with_all_fields() global nyt_dataset, cds_dataset if cds_dataset is None: cds_dataset = CDSDataset.local() if nyt_dataset is None: nyt_dataset = NYTimesDataset.local()
def get_current_hospitalized(fips, t0, category: HospitalizationCategory): """ Return the current estimate for the number of people in the given category for a given fips. Treats a length 2 fips as a state and a length 5 fips as a county Parameters ---------- fips: str US fips to lookup. t0: datetime Datetime to offset by. category: HospitalizationCategory 'icu' for just ICU or 'hospitalized' for all ICU + Acute. Returns ------- time: float Days since t0 for the hospitalization data. current estimate: float The most recent provided value for the current occupied in the requested category. """ if len(fips) == 2: kwargs = dict( aggregation_level=AggregationLevel.STATE, country="USA", state=us.states.lookup(fips).abbr, ) else: kwargs = dict(aggregation_level=AggregationLevel.COUNTY, country="USA", fips=fips) df = combined_datasets.build_us_timeseries_with_all_fields().get_data(**kwargs) return _get_current_hospitalized(df, t0, category)
def generate_api(input_dir, output, summary_output, aggregation_level, state, fips): """The entry function for invocation""" active_states = [state.abbr for state in us.STATES] us_latest = combined_datasets.build_us_latest_with_all_fields().get_subset( aggregation_level, state=state, fips=fips, states=active_states) us_timeseries = combined_datasets.build_us_timeseries_with_all_fields( ).get_subset(aggregation_level, state=state, fips=fips, states=active_states) for intervention in list(Intervention): _logger.info(f"Running intervention {intervention.name}") all_timeseries = api_pipeline.run_on_all_fips_for_intervention( us_latest, us_timeseries, intervention, input_dir) county_timeseries = [ output for output in all_timeseries if output.aggregate_level is AggregationLevel.COUNTY ] api_pipeline.deploy_single_level(intervention, county_timeseries, summary_output, output) state_timeseries = [ output for output in all_timeseries if output.aggregate_level is AggregationLevel.STATE ] api_pipeline.deploy_single_level(intervention, state_timeseries, summary_output, output)
def generate_top_counties(disable_validation, input_dir, output, state, fips): """The entry function for invocation""" intervention = Intervention.SELECTED_INTERVENTION active_states = [state.abbr for state in us.STATES] us_latest = combined_datasets.build_us_latest_with_all_fields().get_subset( AggregationLevel.COUNTY, states=active_states, state=state, fips=fips) us_timeseries = combined_datasets.build_us_timeseries_with_all_fields( ).get_subset(AggregationLevel.COUNTY, states=active_states, state=state, fips=fips) def sort_func(output: CovidActNowAreaTimeseries): return -output.projections.totalHospitalBeds.peakShortfall all_timeseries = api_pipeline.run_on_all_fips_for_intervention( us_latest, us_timeseries, Intervention.SELECTED_INTERVENTION, input_dir, sort_func=sort_func, limit=100, ) bulk_timeseries = CovidActNowBulkTimeseries(__root__=all_timeseries) api_pipeline.deploy_json_api_output( intervention, bulk_timeseries, output, filename_override="counties_top_100.json")
def test_build_api_output_for_intervention(nyc_fips, nyc_model_output_path, tmp_path): county_output = tmp_path / "county" us_latest = combined_datasets.build_us_latest_with_all_fields() us_timeseries = combined_datasets.build_us_timeseries_with_all_fields() nyc_latest = us_latest.get_subset(None, fips=nyc_fips) nyc_timeseries = us_timeseries.get_subset(None, fips=nyc_fips) all_timeseries_api = api_pipeline.run_on_all_fips_for_intervention( nyc_latest, nyc_timeseries, Intervention.STRONG_INTERVENTION, nyc_model_output_path.parent) api_pipeline.deploy_single_level(Intervention.STRONG_INTERVENTION, all_timeseries_api, tmp_path, county_output) expected_outputs = [ "counties.STRONG_INTERVENTION.timeseries.json", "counties.STRONG_INTERVENTION.csv", "counties.STRONG_INTERVENTION.timeseries.csv", "counties.STRONG_INTERVENTION.json", "county/36061.STRONG_INTERVENTION.json", "county/36061.STRONG_INTERVENTION.timeseries.json", ] output_paths = [ str(path.relative_to(tmp_path)) for path in tmp_path.glob("**/*") if not path.is_dir() ] assert sorted(output_paths) == sorted(expected_outputs)
def update_data_public_head( data_directory: pathlib.Path, latest_dataset: latest_values_dataset.LatestValuesDataset = None, timeseries_dataset: timeseries.TimeseriesDataset = None, ) -> Tuple[DatasetPointer, DatasetPointer]: """Persists US latest and timeseries dataset and saves dataset pointers for Latest tag. Args: data_directory: Directory to save dataset and pointer. pointer_path_dir: Directory to save DatasetPointer files. latest_dataset: Optionally specify a LatestValuesDataset to persist instead of building from head. Generally used in testing to sidestep building entire dataset. timeseries_dataset: Optionally specify a TimeseriesDataset to persist instead of building from head. Generally used in testing to sidestep building entire dataset. Returns: Tuple of DatasetPointers to latest and timeseries datasets. """ if not latest_dataset: latest_dataset = combined_datasets.build_us_latest_with_all_fields(skip_cache=True) latest_pointer = persist_dataset(latest_dataset, data_directory) if not timeseries_dataset: timeseries_dataset = combined_datasets.build_us_timeseries_with_all_fields(skip_cache=True) timeseries_pointer = persist_dataset(timeseries_dataset, data_directory) return latest_pointer, timeseries_pointer
def save_combined_csv(csv_path_format, output_dir): """Save the combined datasets DataFrame, cleaned up for easier comparisons.""" csv_path = form_path_name(csv_path_format, output_dir) timeseries = combined_datasets.build_us_timeseries_with_all_fields() timeseries_data = timeseries.data common_df.write_csv(timeseries_data, csv_path, structlog.get_logger())
def get_hospitalization_data(): data = combined_datasets.build_us_timeseries_with_all_fields().data # Since we're using this data for hospitalized data only, only returning # values with hospitalization data. I think as the use cases of this data source # expand, we may not want to drop. For context, as of 4/8 607/1821 rows contained # hospitalization data. has_current_hospital = data[ TimeseriesDataset.Fields.CURRENT_HOSPITALIZED].notnull() has_cumulative_hospital = data[ TimeseriesDataset.Fields.CUMULATIVE_HOSPITALIZED].notnull() return TimeseriesDataset(data[has_current_hospital | has_cumulative_hospital])
def test_combined_county_has_some_timeseries_data(fips): latest = combined_datasets.build_us_timeseries_with_all_fields( ).get_subset(AggregationLevel.COUNTY, fips=fips) df = latest.data.set_index(CommonFields.DATE) assert df.loc["2020-05-01", CommonFields.CASES] > 0 assert df.loc["2020-05-01", CommonFields.DEATHS] > 0 if fips.startswith( "06" ): # TODO(tom): Remove this condition when we have county data in TX too. assert df.loc["2020-05-01", CommonFields.POSITIVE_TESTS] > 0 assert df.loc["2020-05-01", CommonFields.NEGATIVE_TESTS] > 0 assert df.loc["2020-05-01", CommonFields.CURRENT_ICU] > 0
def get_testing_timeseries_by_fips(fips): """Called by generate_api""" testing_df = build_us_timeseries_with_all_fields().get_data( None, fips=fips, columns_slice=CDSDataset.COMMON_TEST_FIELDS ) testing_df[CDSDataset.Fields.TESTED] = ( testing_df[CommonFields.NEGATIVE_TESTS] + testing_df[CommonFields.POSITIVE_TESTS] ) testing_df.drop(columns=[CommonFields.NEGATIVE_TESTS], inplace=True) all_fields = dict(**CDSDataset.INDEX_FIELD_MAP, **CDSDataset.COMMON_FIELD_MAP) testing_df.rename(columns=all_fields, inplace=True) testing_df["date"] = testing_df.date.apply(lambda x: x.strftime("%m/%d/%y")) testing_df.set_index([CDSDataset.Fields.FIPS, CDSDataset.Fields.DATE], inplace=True) return testing_df
def get_testing_timeseries_by_state(state): testing_df = ( build_us_timeseries_with_all_fields() .get_data(aggregation_level=AggregationLevel.STATE, state=state) .loc[:, (CommonFields.NEGATIVE_TESTS, CommonFields.POSITIVE_TESTS, CommonFields.DATE)] ) testing_df.rename( columns={ CommonFields.POSITIVE_TESTS: CovidTrackingDataSource.Fields.POSITIVE_TESTS, CommonFields.NEGATIVE_TESTS: CovidTrackingDataSource.Fields.NEGATIVE_TESTS, }, inplace=True, ) testing_df["date"] = testing_df.date.apply(lambda x: x.strftime("%m/%d/%y")) return testing_df
def generate_state_timeseries( projection_row, intervention, input_dir ) -> CovidActNowStateTimeseries: state = US_STATE_ABBREV[projection_row[rc.STATE_FULL_NAME]] fips = projection_row[rc.FIPS] raw_dataseries = get_can_projection.get_can_raw_data( input_dir, state, fips, AggregationLevel.STATE, intervention ) # join in state testing data onto the timeseries # left join '%m/%d/%y', so the left join gracefully handles # missing state testing data (i.e. NE) testing_df = get_testing_timeseries_by_state(state) new_df = pd.DataFrame(raw_dataseries).merge(testing_df, on="date", how="left") can_dataseries = new_df.to_dict(orient="records") timeseries = [] for data_series in can_dataseries: timeseries.append(_generate_state_timeseries_row(data_series)) projections = _generate_api_for_projections(projection_row) if len(timeseries) < 1: raise Exception(f"State time series empty for {intervention.name}") state_intervention = get_can_projection.get_intervention_for_state(state) actuals_ts = combined_datasets.build_us_timeseries_with_all_fields() actual_latest = combined_datasets.build_us_latest_with_all_fields() state_latest = actual_latest.get_record_for_state(state) return CovidActNowStateTimeseries( population=state_latest[CommonFields.POPULATION], lat=projection_row[rc.LATITUDE], long=projection_row[rc.LONGITUDE], actuals=_generate_actuals( state_latest, state_intervention ), stateName=projection_row[rc.STATE_FULL_NAME], fips=projection_row[rc.FIPS], lastUpdatedDate=_format_date(projection_row[rc.LAST_UPDATED]), projections=projections, timeseries=timeseries, actuals_timeseries=_generate_actuals_timeseries( actuals_ts.get_records_for_state(state), state_intervention ), )
def generate_county_timeseries(projection_row, intervention, input_dir): state_abbrev = US_STATE_ABBREV[projection_row[rc.STATE_FULL_NAME]] fips = projection_row[rc.FIPS] raw_dataseries = get_can_projection.get_can_raw_data( input_dir, state_abbrev, fips, AggregationLevel.COUNTY, intervention ) testing_df = get_testing_timeseries_by_fips(fips) new_df = pd.DataFrame(raw_dataseries).merge(testing_df, on="date", how="left") can_dataseries = new_df.to_dict(orient="records") timeseries = [] for data_series in can_dataseries: timeseries.append(_generate_county_timeseries_row(data_series)) if len(timeseries) < 1: raise Exception(f"County time series empty for {intervention.name}") projections = _generate_api_for_projections(projection_row) state_intervention = get_can_projection.get_intervention_for_state(state_abbrev) actuals_ts = combined_datasets.build_us_timeseries_with_all_fields() actual_latest = combined_datasets.build_us_latest_with_all_fields() fips_latest = actual_latest.get_record_for_fips(fips) return CovidActNowCountyTimeseries( population=fips_latest[CommonFields.POPULATION], lat=projection_row[rc.LATITUDE], long=projection_row[rc.LONGITUDE], actuals=_generate_actuals( fips_latest, state_intervention ), stateName=projection_row[rc.STATE_FULL_NAME], countyName=projection_row[rc.COUNTY], fips=projection_row[rc.FIPS], lastUpdatedDate=_format_date(projection_row[rc.LAST_UPDATED]), projections=projections, timeseries=timeseries, actuals_timeseries=_generate_actuals_timeseries( actuals_ts.get_records_for_fips(fips), state_intervention ), )
def test_generate_timeseries_for_fips(include_projections, nyc_model_output_path, nyc_fips): us_latest = combined_datasets.build_us_latest_with_all_fields() us_timeseries = combined_datasets.build_us_timeseries_with_all_fields() nyc_latest = us_latest.get_record_for_fips(nyc_fips) nyc_timeseries = us_timeseries.get_subset(None, fips=nyc_fips) intervention = Intervention.OBSERVED_INTERVENTION model_output = CANPyseirLocationOutput.load_from_path( nyc_model_output_path) area_summary = generate_api.generate_area_summary(nyc_latest, model_output) area_timeseries = generate_api.generate_area_timeseries( area_summary, nyc_timeseries, model_output) summary = generate_api.generate_area_summary(nyc_latest, model_output) assert summary.dict() == area_timeseries.area_summary.dict() # Double checking that serialized json does not contain NaNs, all values should # be serialized using the simplejson wrapper. assert "NaN" not in area_timeseries.json()
def test_build_timeseries_and_summary_outputs(nyc_model_output_path, nyc_fips, intervention): us_latest = combined_datasets.build_us_latest_with_all_fields() us_timeseries = combined_datasets.build_us_timeseries_with_all_fields() timeseries = api_pipeline.build_timeseries_for_fips( intervention, us_latest, us_timeseries, nyc_model_output_path.parent, nyc_fips) if intervention is Intervention.NO_INTERVENTION: # Test data does not contain no intervention model, should not output any results. assert not timeseries return assert timeseries if intervention is Intervention.STRONG_INTERVENTION: assert timeseries.projections assert timeseries.timeseries elif intervention is Intervention.OBSERVED_INTERVENTION: assert not timeseries.projections assert not timeseries.timeseries
def test_unique_index_values_us_timeseries(): timeseries = combined_datasets.build_us_timeseries_with_all_fields() timeseries_data = timeseries.data.set_index(timeseries.INDEX_FIELDS) duplicates = timeseries_data.index.duplicated() assert not sum(duplicates)
def load_new_test_data_by_fips(fips, t0, smoothing_tau=5, correction_threshold=5): """ Return a timeseries of new tests for a geography. Note that due to reporting discrepancies county to county, and state-to-state, these often do not go back as far as case data. Parameters ---------- fips: str State or county fips code t0: datetime Reference datetime to use. Returns ------- df: pd.DataFrame DataFrame containing columns: - 'date', - 'new_tests': Number of total tests performed that day - 'increase_in_new_tests': Increase in tests performed that day vs previous day - 'positivity_rate': Test positivity rate - 'expected_positives_from_test_increase': Number of positive detections expected just from increased test capacity. - times: days since t0 for this observation. smoothing_tau: int expected_positives_from_test_increase is smoothed based on an exponentially weighted moving average of decay factor specified here. correction_threshold: int Do not apply a correction if the incident cases per day is lower than this value. There can be instability if case counts are very low. """ us_timeseries = combined_datasets.build_us_timeseries_with_all_fields() if len(fips) == 2: df = us_timeseries.get_data(AggregationLevel.STATE, state=us.states.lookup(fips).abbr) else: df = us_timeseries.get_data(AggregationLevel.COUNTY, fips=fips) df = df[ (df[CommonFields.POSITIVE_TESTS].notnull()) & (df[CommonFields.NEGATIVE_TESTS].notnull()) & ((df[CommonFields.POSITIVE_TESTS] + df[CommonFields.NEGATIVE_TESTS]) > 0) ] df["positivity_rate"] = df[CommonFields.POSITIVE_TESTS] / ( df[CommonFields.POSITIVE_TESTS] + df[CommonFields.NEGATIVE_TESTS] ) df["new_positive"] = np.append([0], np.diff(df[CommonFields.POSITIVE_TESTS])) # The first derivative gets us new instead of cumulative tests while the second derivative gives us the change in new test rate. df["new_tests"] = np.append( [0], np.diff(df[CommonFields.POSITIVE_TESTS] + df[CommonFields.NEGATIVE_TESTS]) ) df["increase_in_new_tests"] = np.append([0], np.diff(df["new_tests"])) # dPositive / dTotal = 0.65 * positivity_rate was empirically determined by looking at # the increase in positives day-over-day relative to the increase in total tests across all 50 states. df["expected_positives_from_test_increase"] = ( df["increase_in_new_tests"] * 0.65 * df["positivity_rate"] ) df = df[ [ "date", "new_tests", "increase_in_new_tests", "positivity_rate", "expected_positives_from_test_increase", "new_positive", ] ] df = df[df.increase_in_new_tests.notnull() & df.positivity_rate.notnull()] df["expected_positives_from_test_increase"] = ewma_smoothing( df["expected_positives_from_test_increase"], smoothing_tau ) df["expected_positives_from_test_increase"][df["new_positive"] < 5] = 0 df["times"] = [ int((date - t0).days) for date in pd.to_datetime(df["date"].values).to_pydatetime() ] return df
def _cache_global_datasets(): # Populate cache for combined latest and timeseries. Caching pre-fork # will make sure cache is populated for subprocesses. Return value # is not needed as the only goal is to populate the cache. combined_datasets.build_us_latest_with_all_fields() combined_datasets.build_us_timeseries_with_all_fields()
def load_hospitalization_data_by_state( state: str, t0: datetime, category: HospitalizationCategory = HospitalizationCategory.HOSPITALIZED, ): """ Obtain hospitalization data. We clip because there are sometimes negatives either due to data reporting or corrections in case count. These are always tiny so we just make downstream easier to work with by clipping. Parameters ---------- state: str State to lookup. t0: datetime Datetime to offset by. category: HospitalizationCategory 'icu' for just ICU or 'hospitalized' for all ICU + Acute. Returns ------- times: array(float) or NoneType List of float days since t0 for the hospitalization data. observed_hospitalizations: array(int) or NoneType Array of new cases observed each day. type: HospitalizationDataType Specifies cumulative or current hospitalizations. """ abbr = us.states.lookup(state).abbr hospitalization_data = combined_datasets.build_us_timeseries_with_all_fields( ).get_data(AggregationLevel.STATE, country="USA", state=abbr) if len(hospitalization_data) == 0: return None, None, None if (hospitalization_data[f"current_{category}"] > 0).any(): hospitalization_data = hospitalization_data[ hospitalization_data[f"current_{category}"].notnull()] times_new = (hospitalization_data["date"].dt.date - t0.date()).dt.days.values return ( times_new, hospitalization_data[f"current_{category}"].values.clip(min=0), HospitalizationDataType.CURRENT_HOSPITALIZATIONS, ) elif (hospitalization_data[f"cumulative_{category}"] > 0).any(): hospitalization_data = hospitalization_data[ hospitalization_data[f"cumulative_{category}"].notnull()] times_new = (hospitalization_data["date"].dt.date - t0.date()).dt.days.values cumulative = hospitalization_data[ f"cumulative_{category}"].values.clip(min=0) # Some minor glitches for a few states.. for i, val in enumerate(cumulative[1:]): if cumulative[i] > cumulative[i + 1]: cumulative[i] = cumulative[i + 1] return ( times_new, hospitalization_data[f"cumulative_{category}"].values.clip(min=0), HospitalizationDataType.CUMULATIVE_HOSPITALIZATIONS, ) else: return None, None, None
def get_usa_by_states_df(input_dir: str, intervention: Intervention): us_only = _get_usa_by_county_df() interventions_df = _get_interventions_df() projections_df = get_state_projections_df(input_dir, intervention.value, interventions_df) testing_df = build_us_timeseries_with_all_fields().get_data( aggregation_level=AggregationLevel.STATE, columns_slice=[ CommonFields.STATE, CommonFields.POSITIVE_TESTS, CommonFields.NEGATIVE_TESTS, ], ) test_max_df = ( testing_df.groupby(CommonFields.STATE, as_index=False)[ [CommonFields.POSITIVE_TESTS, CommonFields.NEGATIVE_TESTS] ] .max() .rename( columns={ CommonFields.POSITIVE_TESTS: CovidTrackingDataSource.Fields.POSITIVE_TESTS, CommonFields.NEGATIVE_TESTS: CovidTrackingDataSource.Fields.NEGATIVE_TESTS, } ) ) states_group = us_only.groupby([CommonFields.STATE]) states_agg = states_group.aggregate( { "Last Update": "max", "Confirmed": "sum", "Recovered": "sum", "Deaths": "sum", "Active": "sum", "Country/Region": "first", "Latitude": "first", "Longitude": "first" # People tested is currently null #'People Tested': 'sum' } ) states_abbrev = ( states_agg.merge(test_max_df, on=CommonFields.STATE, how="left") .merge(interventions_df, on=CommonFields.STATE, how="inner", suffixes=["", "_dropcol"],) .merge(projections_df, on=CommonFields.STATE, how="left") ) STATE_COLS_REMAP = { CovidTrackingDataSource.Fields.POSITIVE_TESTS: CUMULATIVE_POSITIVE_TESTS, CovidTrackingDataSource.Fields.NEGATIVE_TESTS: CUMULATIVE_NEGATIVE_TESTS, **OUTPUT_COLUMN_REMAP_TO_RESULT_DATA, } states_remapped = states_abbrev.rename(columns=STATE_COLS_REMAP) states_remapped[CommonFields.STATE_FULL_NAME] = states_remapped[CommonFields.STATE].map( abbrev_us_state ) states_final = pd.DataFrame(states_remapped, columns=RESULT_DATA_COLUMNS_STATES) # Keep nulls as nulls states_final = states_final.fillna(NULL_VALUE) states_final["Combined Key"] = states_final[CommonFields.STATE_FULL_NAME] states_final[CommonFields.FIPS] = states_final[CommonFields.STATE_FULL_NAME].map(us_fips) states_final.index.name = "OBJECTID" assert states_final["Combined Key"].value_counts().max() == 1 return states_final
def load_hospitalization_data_by_state(state, t0, convert_cumulative_to_current=False, category="hospitalized"): """ Obtain hospitalization data. We clip because there are sometimes negatives either due to data reporting or corrections in case count. These are always tiny so we just make downstream easier to work with by clipping. Parameters ---------- state: str State to lookup. t0: datetime Datetime to offset by. convert_cumulative_to_current: bool If True, and only cumulative hospitalizations are available, convert the current hospitalizations to the current value. category: str 'icu' for just ICU or 'hospitalized' for all ICU + Acute. Returns ------- times: array(float) or NoneType List of float days since t0 for the hospitalization data. observed_hospitalizations: array(int) or NoneType Array of new cases observed each day. type: HospitalizationDataType Specifies cumulative or current hospitalizations. """ abbr = us.states.lookup(state).abbr hospitalization_data = ( combined_datasets.build_us_timeseries_with_all_fields().get_subset( AggregationLevel.STATE, country="USA", state=abbr).get_data(country="USA", state=abbr)) categories = ["icu", "hospitalized"] if category not in categories: raise ValueError( f"Hospitalization category {category} is not in {categories}") if len(hospitalization_data) == 0: return None, None, None if (hospitalization_data[f"current_{category}"] > 0).any(): hospitalization_data = hospitalization_data[ hospitalization_data[f"current_{category}"].notnull()] times_new = (hospitalization_data["date"].dt.date - t0.date()).dt.days.values return ( times_new, hospitalization_data[f"current_{category}"].values.clip(min=0), HospitalizationDataType.CURRENT_HOSPITALIZATIONS, ) elif (hospitalization_data[f"cumulative_{category}"] > 0).any(): hospitalization_data = hospitalization_data[ hospitalization_data[f"cumulative_{category}"].notnull()] times_new = (hospitalization_data["date"].dt.date - t0.date()).dt.days.values cumulative = hospitalization_data[ f"cumulative_{category}"].values.clip(min=0) # Some minor glitches for a few states.. for i, val in enumerate(cumulative[1:]): if cumulative[i] > cumulative[i + 1]: cumulative[i] = cumulative[i + 1] if convert_cumulative_to_current: # Must be here to avoid circular import. This is required to convert # cumulative hosps to current hosps. We also just use a dummy fips and t_list. from pyseir.parameters.parameter_ensemble_generator import ParameterEnsembleGenerator params = ParameterEnsembleGenerator( fips="06", t_list=[], N_samples=1).get_average_seir_parameters() if category == "hospitalized": average_length_of_stay = ( params["hospitalization_rate_general"] * params["hospitalization_length_of_stay_general"] + params["hospitalization_rate_icu"] * (1 - params["fraction_icu_requiring_ventilator"]) * params["hospitalization_length_of_stay_icu"] + params["hospitalization_rate_icu"] * params["fraction_icu_requiring_ventilator"] * params["hospitalization_length_of_stay_icu_and_ventilator"] ) / (params["hospitalization_rate_general"] + params["hospitalization_rate_icu"]) else: average_length_of_stay = ( (1 - params["fraction_icu_requiring_ventilator"]) * params["hospitalization_length_of_stay_icu"] + params["fraction_icu_requiring_ventilator"] * params["hospitalization_length_of_stay_icu_and_ventilator"] ) # Now compute a cumulative sum, but at each day, subtract the discharges from the previous count. new_hospitalizations = np.append([0], np.diff(cumulative)) current = [0] for i, new_hosps in enumerate(new_hospitalizations[1:]): current.append(current[i] + new_hosps - current[i] / average_length_of_stay) return times_new, current, HospitalizationDataType.CURRENT_HOSPITALIZATIONS else: return times_new, cumulative, HospitalizationDataType.CUMULATIVE_HOSPITALIZATIONS else: return None, None, None