def _map_outputs( state=None, output_interval_days=1, states_only=False, output_dir=None, run_mode="default", ): output_interval_days = int(output_interval_days) _cache_global_datasets() if state: web_ui_mapper = WebUIDataAdaptorV1( state, output_interval_days=output_interval_days, run_mode=run_mode, jhu_dataset=nyt_dataset, cds_dataset=cds_dataset, output_dir=output_dir, ) web_ui_mapper.generate_state(states_only=states_only) else: for state_name in ALL_STATES: _map_outputs( state_name, output_interval_days, states_only=states_only, run_mode=run_mode, output_dir=output_dir, )
def _map_outputs(states, output_interval_days=1, states_only=False, output_dir=None, run_mode="default"): for state in states: web_ui_mapper = WebUIDataAdaptorV1( state, output_interval_days=output_interval_days, run_mode=run_mode, output_dir=output_dir, ) web_ui_mapper.generate_state(whitelisted_county_fips=[], states_only=states_only)
def _write_pipeline_output( pipelines: List[Union[SubStatePipeline, StatePipeline]], output_dir: str, output_interval_days: int = 4, write_webui_output: bool = False, ): infection_rate_metric_df = pd.concat((p.infer_df for p in pipelines), ignore_index=True) # TODO: Use constructors in MultiRegionTimeseriesDataset timeseries_dataset = TimeseriesDataset(infection_rate_metric_df) latest = timeseries_dataset.latest_values_object() multiregion_rt = MultiRegionTimeseriesDataset.from_timeseries_and_latest( timeseries_dataset, latest) output_path = pathlib.Path( output_dir) / pyseir.utils.SummaryArtifact.RT_METRIC_COMBINED.value multiregion_rt.to_csv(output_path) root.info(f"Saving Rt results to {output_path}") icu_df = pd.concat((p.icu_data.data for p in pipelines if p.icu_data), ignore_index=True) timeseries_dataset = TimeseriesDataset(icu_df) latest = timeseries_dataset.latest_values_object().data.set_index( CommonFields.LOCATION_ID) multiregion_icu = MultiRegionTimeseriesDataset(icu_df, latest) output_path = pathlib.Path( output_dir) / pyseir.utils.SummaryArtifact.ICU_METRIC_COMBINED.value multiregion_icu.to_csv(output_path) root.info(f"Saving ICU results to {output_path}") if write_webui_output: # does not parallelize well, because web_ui mapper doesn't serialize efficiently # TODO: Remove intermediate artifacts and paralellize artifacts creation better # Approximately 40% of the processing time is taken on this step web_ui_mapper = WebUIDataAdaptorV1( output_interval_days=output_interval_days, output_dir=output_dir, ) webui_inputs = [ webui_data_adaptor_v1.RegionalInput.from_results( p.fitter, p.ensemble, p.infer_df) for p in pipelines if p.fitter ] with Pool(maxtasksperchild=1) as p: p.map(web_ui_mapper.write_region_safely, webui_inputs)
def _map_outputs( state=None, output_interval_days=1, states_only=False, output_dir=None, run_mode="default", ): output_interval_days = int(output_interval_days) _cache_global_datasets() if state: web_ui_mapper = WebUIDataAdaptorV1( state, output_interval_days=output_interval_days, run_mode=run_mode, output_dir=output_dir, ) web_ui_mapper.generate_state( whitelisted_county_fips=[], states_only=states_only, ) else: for state_name in ALL_STATES: _map_outputs( state_name, output_interval_days, states_only=states_only, run_mode=run_mode, output_dir=output_dir, )
def _build_all_for_states( states=[], run_mode=DEFAULT_RUN_MODE, generate_reports=False, output_interval_days=4, skip_download=False, output_dir=None, skip_whitelist=False, states_only=False, ): # prepare data _cache_global_datasets() if not skip_download: cache_all_data() if not skip_whitelist: _generate_whitelist() # do everything for just states in paralell p = Pool() states_only_func = partial( _state_only_pipeline, run_mode=run_mode, generate_reports=generate_reports, output_interval_days=output_interval_days, output_dir=output_dir, ) p.map(states_only_func, states) if states_only: root.info("Only executing for states. returning.") return # run states in paralell all_county_fips = {} for state in states: state_county_fips = model_fitter.build_county_list(state) county_fips_per_state = {fips: state for fips in state_county_fips} all_county_fips.update(county_fips_per_state) # calculate calculate county inference p.map(infer_rt_module.run_county, all_county_fips.keys()) # calculate model fit root.info(f"executing model for {len(all_county_fips)} counties") fitters = p.map(model_fitter._execute_model_for_fips, all_county_fips.keys()) df = pd.DataFrame([fit.fit_results for fit in fitters if fit]) df["state"] = df.fips.replace(all_county_fips) df["mle_model"] = [fit.mle_model for fit in fitters if fit] df.index = df.fips state_dfs = [state_df for name, state_df in df.groupby("state")] p.map(model_fitter._persist_results_per_state, state_dfs) # calculate ensemble root.info(f"running ensemble for {len(all_county_fips)} counties") ensemble_func = partial( _run_county, ensemble_kwargs=dict(run_mode=run_mode, generate_report=generate_reports), ) p.map(ensemble_func, all_county_fips.keys()) # output it all output_interval_days = int(output_interval_days) _cache_global_datasets() root.info( f"outputing web results for states and {len(all_county_fips)} counties" ) # does not parallelize well, because web_ui mapper doesn't serialize efficiently # TODO: Remove intermediate artifacts and paralellize artifacts creation better # Approximately 40% of the processing time is taken on this step for state in states: web_ui_mapper = WebUIDataAdaptorV1( state, output_interval_days=output_interval_days, run_mode=run_mode, jhu_dataset=nyt_dataset, cds_dataset=cds_dataset, output_dir=output_dir, ) web_ui_mapper.generate_state(all_fips=all_county_fips.keys()) p.close() p.join() return
def _build_all_for_states( states: List[str], run_mode=DEFAULT_RUN_MODE, generate_reports=False, output_interval_days=4, output_dir=None, skip_whitelist=False, states_only=False, fips=None, ): # prepare data _cache_global_datasets() if not skip_whitelist: _generate_whitelist() # do everything for just states in parallel with Pool(maxtasksperchild=1) as p: states_only_func = partial( _state_only_pipeline, run_mode=run_mode, generate_reports=generate_reports, output_interval_days=output_interval_days, output_dir=output_dir, ) p.map(states_only_func, states) if states_only: root.info("Only executing for states. returning.") return all_county_fips = build_counties_to_run_per_state(states, fips=fips) with Pool(maxtasksperchild=1) as p: # calculate calculate county inference p.map(infer_rt.run_rt_for_fips, all_county_fips.keys()) # calculate model fit root.info(f"executing model for {len(all_county_fips)} counties") fitters = p.map(model_fitter.execute_model_for_fips, all_county_fips.keys()) df = pd.DataFrame([fit.fit_results for fit in fitters if fit]) df["state"] = df.fips.replace(all_county_fips) df["mle_model"] = [fit.mle_model for fit in fitters if fit] df.index = df.fips state_dfs = [state_df for name, state_df in df.groupby("state")] p.map(model_fitter._persist_results_per_state, state_dfs) # calculate ensemble root.info(f"running ensemble for {len(all_county_fips)} counties") ensemble_func = partial( ensemble_runner._run_county, ensemble_kwargs=dict(run_mode=run_mode, generate_report=generate_reports), ) p.map(ensemble_func, all_county_fips.keys()) # output it all output_interval_days = int(output_interval_days) _cache_global_datasets() root.info( f"outputting web results for states and {len(all_county_fips)} counties" ) # does not parallelize well, because web_ui mapper doesn't serialize efficiently # TODO: Remove intermediate artifacts and paralellize artifacts creation better # Approximately 40% of the processing time is taken on this step for state in states: web_ui_mapper = WebUIDataAdaptorV1( state, output_interval_days=output_interval_days, run_mode=run_mode, output_dir=output_dir, ) web_ui_mapper.generate_state( whitelisted_county_fips=[ k for k, v in all_county_fips.items() if v == state ], states_only=False, ) return