def __init__( self, state, output_interval_days=4, run_mode="can-before", output_dir=None, jhu_dataset=None, cds_dataset=None, include_imputed=False, ): self.output_interval_days = output_interval_days self.state = state self.run_mode = RunMode(run_mode) self.include_imputed = include_imputed self.state_abbreviation = us.states.lookup(state).abbr self.population_data = FIPSPopulation.local().population() self.output_dir = output_dir self.jhu_local = jhu_dataset or JHUDataset.local() self.cds_dataset = cds_dataset or CDSDataset.local() self.county_timeseries = build_aggregate_county_data_frame(self.jhu_local, self.cds_dataset) self.county_timeseries["date"] = self.county_timeseries["date"].dt.normalize() self.state_timeseries = self.jhu_local.timeseries().state_data self.state_timeseries["date"] = self.state_timeseries["date"].dt.normalize() self.df_whitelist = load_data.load_whitelist() self.df_whitelist = self.df_whitelist[self.df_whitelist["inference_ok"] == True]
def build_county_summary(min_date, country="USA", state=None, output_dir=OUTPUT_DIR): """Builds county summary json files.""" beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset( AggregationLevel.COUNTY, after=min_date, country=country, state=state ) output_dir = pathlib.Path(output_dir) / "county_summaries" _logger.info(f"Outputting to {output_dir}") if not output_dir.exists(): _logger.info(f"{output_dir} does not exist, creating") output_dir.mkdir(parents=True) counties_by_state = defaultdict(list) for country, state, county, fips in timeseries.county_keys(): counties_by_state[state].append((county, fips)) for state, counties in counties_by_state.items(): data = {"counties_with_data": []} for county, fips in counties: cases = timeseries.get_data(state=state, country=country, fips=fips) beds = beds_data.get_county_level(state, fips=fips) population = population_data.get_county_level(country, state, fips=fips) if population and beds and sum(cases.cases): data["counties_with_data"].append(fips) output_path = output_dir / f"{state}.summary.json" output_path.write_text(json.dumps(data, indent=2))
def run_state_level_forecast( min_date, max_date, country="USA", state=None, output_dir=OUTPUT_DIR ): # DH Beds dataset does not have all counties, so using the legacy state # level bed data. legacy_dataset = LegacyJHUDataset(min_date) population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset( AggregationLevel.STATE, after=min_date, country=country, state=state ) output_dir = pathlib.Path(OUTPUT_DIR) if output_dir.exists() and not state: backup = output_dir.name + "." + str(int(time.time())) output_dir.rename(output_dir.parent / backup) output_dir.mkdir(parents=True, exist_ok=True) pool = get_pool() for state in timeseries.states: args = ( country, state, timeseries, legacy_dataset, population_data, min_date, max_date, output_dir, ) pool.apply_async(forecast_each_state, args=args) pool.close() pool.join()
def __init__( self, state, output_interval_days=4, run_mode="can-before", output_dir=None, jhu_dataset=None, cds_dataset=None, include_imputed=False, ): self.output_interval_days = output_interval_days self.state = state self.run_mode = RunMode(run_mode) self.include_imputed = include_imputed self.state_abbreviation = us.states.lookup(state).abbr self.population_data = FIPSPopulation.local().population() self.output_dir = output_dir self.jhu_local = jhu_dataset or JHUDataset.local() self.cds_dataset = cds_dataset or CDSDataset.local() self.county_timeseries = build_aggregate_county_data_frame( self.jhu_local, self.cds_dataset) self.county_timeseries["date"] = self.county_timeseries[ "date"].dt.normalize() state_timeseries = self.jhu_local.timeseries().get_subset( AggregationLevel.STATE) self.state_timeseries = state_timeseries.data["date"].dt.normalize()
def run_latest(version: data_version.DataVersion, output: pathlib.Path): """Get latest case values from JHU dataset.""" output.mkdir(exist_ok=True) timeseries = JHUDataset.local().timeseries() state_summaries = dataset_export.latest_case_summaries_by_state(timeseries) for state, state_summary in state_summaries: output_file = output / f"{state}.summary.json" with output_file.open("w") as f: _logger.info(f"Writing latest data for {state}") json.dump(state_summary, f) version.write_file("case_summary", output)
def run_latest(version: data_version.DataVersion, output: pathlib.Path): """Get latest case values from JHU dataset.""" output.mkdir(exist_ok=True) timeseries = JHUDataset.local().timeseries() state_summaries = dataset_export.latest_case_summaries_by_state(timeseries) for state_summary in state_summaries: state = state_summary.state output_file = output / f"{state}.summary.json" _logger.info(f"Writing latest data for {state} to {output_file}") output_file.write_text(state_summary.json(indent=2)) version.write_file("case_summary", output)
def run_county_level_forecast( min_date: datetime.datetime, max_date: datetime.datetime, output_dir: pathlib.Path, country: str = "USA", state: str = None, ): beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset(AggregationLevel.COUNTY, after=min_date, country=country, state=state) _logger.info(f"Outputting to {output_dir}") output_dir.mkdir(parents=True, exist_ok=True) counties_by_state = defaultdict(list) county_keys = timeseries.county_keys() for country, state, county, fips in county_keys: counties_by_state[state].append((county, fips)) pool = get_pool() for state, counties in counties_by_state.items(): _logger.info(f"Running county models for {state}") for county, fips in counties: args = ( min_date, max_date, country, state, county, fips, timeseries, beds_data, population_data, output_dir, ) pool.apply_async( forecast_each_county, args, callback=_result_callback_wrapper( f"{county}, {state}: {fips}"), ) pool.close() pool.join()
def run_county_level_forecast(min_date, max_date, country="USA", state=None, output_dir=OUTPUT_DIR): beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset(AggregationLevel.COUNTY, after=min_date, country=country, state=state) output_dir = pathlib.Path(output_dir) / "county" _logger.info(f"Outputting to {output_dir}") # Dont want to replace when just running the states if output_dir.exists() and not state: backup = output_dir.name + "." + str(int(time.time())) output_dir.rename(output_dir.parent / backup) output_dir.mkdir(parents=True, exist_ok=True) counties_by_state = defaultdict(list) county_keys = timeseries.county_keys() for country, state, county, fips in county_keys: counties_by_state[state].append((county, fips)) pool = get_pool() for state, counties in counties_by_state.items(): _logger.info(f"Running county models for {state}") for county, fips in counties: args = ( min_date, max_date, country, state, county, fips, timeseries, beds_data, population_data, output_dir, ) # forecast_each_county(*args) pool.apply_async(forecast_each_county, args=args) pool.close() pool.join()
def run_latest(deploy=False): """Get latest case values from JHU dataset.""" output_dir = pathlib.Path(build_params.OUTPUT_DIR) if deploy: output_dir = WEB_DEPLOY_PATH output_folder = output_dir / "case_summary" output_folder.mkdir(exist_ok=True) timeseries = JHUDataset.local().timeseries() state_summaries = dataset_export.latest_case_summaries_by_state(timeseries) for state, state_summary in state_summaries: output_file = output_folder / f"{state}.summary.json" with output_file.open("w") as f: _logger.info(f"Writing latest data for {state}") json.dump(state_summary, f)
def run_state_level_forecast( min_date, max_date, output_dir, country="USA", state=None, ): # DH Beds dataset does not have all counties, so using the legacy state # level bed data. beds_data = DHBeds.local().beds() population_data = FIPSPopulation.local().population() timeseries = JHUDataset.local().timeseries() timeseries = timeseries.get_subset(AggregationLevel.STATE, after=min_date, country=country, state=state) output_dir = pathlib.Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) pool = get_pool() for state in timeseries.states: args = ( country, state, timeseries, beds_data, population_data, min_date, max_date, output_dir, ) pool.apply_async( forecast_each_state, args, callback=_result_callback_wrapper(f"{state}, {country}"), ) pool.close() pool.join()
def __init__(self, fips, n_years=.5, n_samples=250, suppression_policy=(0.35, 0.5, 0.75, 1), skip_plots=False, output_percentiles=(5, 25, 32, 50, 75, 68, 95), generate_report=True, run_mode=RunMode.DEFAULT, min_hospitalization_threshold=5, hospitalization_to_confirmed_case_ratio=1 / 4, covid_timeseries=None): self.fips = fips self.agg_level = AggregationLevel.COUNTY if len(fips) == 5 else AggregationLevel.STATE self.t_list = np.linspace(0, int(365 * n_years), int(365 * n_years) + 1) self.skip_plots = skip_plots self.run_mode = RunMode(run_mode) self.hospitalizations_for_state = None self.min_hospitalization_threshold = min_hospitalization_threshold self.hospitalization_to_confirmed_case_ratio = hospitalization_to_confirmed_case_ratio if self.agg_level is AggregationLevel.COUNTY: self.county_metadata = load_data.load_county_metadata_by_fips(fips) self.state_abbr = us.states.lookup(self.county_metadata['state']).abbr self.state_name = us.states.lookup(self.county_metadata['state']).name self.output_file_report = get_run_artifact_path(self.fips, RunArtifact.ENSEMBLE_REPORT) self.output_file_data = get_run_artifact_path(self.fips, RunArtifact.ENSEMBLE_RESULT) else: self.state_abbr = us.states.lookup(self.fips).abbr self.state_name = us.states.lookup(self.fips).name self.output_file_report = None self.output_file_data = get_run_artifact_path(self.fips, RunArtifact.ENSEMBLE_RESULT) county_fips = None if self.agg_level is AggregationLevel.STATE else self.fips if not covid_timeseries: covid_timeseries = JHUDataset.local().timeseries() else: covid_timeseries = covid_timeseries.timeseries() self.covid_data = covid_timeseries\ .get_subset(self.agg_level, country='USA', state=self.state_abbr) \ .get_data(country='USA', state=self.state_abbr, fips=county_fips) \ .sort_values('date') os.makedirs(os.path.dirname(self.output_file_data), exist_ok=True) if self.output_file_report: os.makedirs(os.path.dirname(self.output_file_report), exist_ok=True) self.output_percentiles = output_percentiles self.n_samples = n_samples self.n_years = n_years # TODO: Will be soon replaced with loaders for all the inferred params. # self.t0 = fit_results.load_t0(fips) self.date_generated = datetime.datetime.utcnow().isoformat() self.suppression_policy = suppression_policy self.summary = copy.deepcopy(self.__dict__) self.summary.pop('t_list') self.generate_report = generate_report self.suppression_policies = None self.override_params = dict() self.init_run_mode() self.all_outputs = {}