def main(fetch: bool): common_init.configure_logging() log = structlog.get_logger() ccd_dataset = ccd_helpers.CovidCountyDataset.load(fetch=fetch) all_df = transform(ccd_dataset) common_df.write_csv(all_df, OUTPUT_PATH, log)
def main(replace_local_mirror: bool, generate_common_csv: bool): common_init.configure_logging() if replace_local_mirror: update_datasets() if generate_common_csv: common_df.write_csv(transform_cms_datasets(), TIMESERIES_CSV_PATH, _logger)
def main(fetch: bool): common_init.configure_logging() log = structlog.get_logger(updater="CovidDataScraperTransformer") local_path = DATA_ROOT / "cases-cds" / "timeseries-common.csv" transformer = CovidDataScraperTransformer.make_with_data_root(DATA_ROOT, log) if fetch: transformer.fetch() common_df.write_csv(transformer.transform(), local_path, log)
def main(fetch: bool): common_init.configure_logging() log = structlog.get_logger() updater = CovidCareMapUpdater() if fetch: updater.update() df = updater.transform() common_df.write_csv(df, STATIC_CSV_PATH, log, [CommonFields.FIPS])
def main(fetch: bool): common_init.configure_logging() log = structlog.get_logger() TestAndTraceSyncer( source_url=SOURCE_URL, census_state_path=DATA_ROOT / "misc" / "state.txt", gsheets_copy_directory=DATA_ROOT / "test-and-trace" / "gsheet-copy", state_timeseries_path=DATA_ROOT / "test-and-trace" / "state_data.csv", date_today=date.today(), ).update(fetch=fetch, log=log)
def main(fetch: bool): common_init.configure_logging() connection = zoltpy.util.authenticate() transformer = ForecastHubUpdater.make_with_data_root( ForecastModel.ENSEMBLE, connection, DATA_ROOT ) if fetch: _logger.info("Fetching new data.") transformer.update_source_data() data = transformer.load_source_data() data = transformer.transform(data) common_df.write_csv(data, transformer.timeseries_output_path, _logger)
def main(replace_local_mirror: bool, generate_common_csv: bool): logging.basicConfig(level=logging.INFO) common_init.configure_logging() if replace_local_mirror: update_local_json() CovidTrackingDataUpdater().update() if generate_common_csv: common_df.write_csv( transform(load_local_json()), TIMESERIES_CSV_PATH, structlog.get_logger(), )
def main(replace_local_mirror: bool): common_init.configure_logging() copier = AwsDataLakeCopier.make_with_data_root(DATA_ROOT) if replace_local_mirror: copier.replace_local_mirror() transformer = AwsDataLakeTransformer.make_with_data_root(DATA_ROOT) for source_name, source_files in copier.get_sources(): log = structlog.get_logger(source_name=source_name) write_df_as_csv( transformer.transform(source_files, log), DATA_ROOT / "aws-lake" / f"timeseries-{source_name}.csv", log, )
def main(check_for_new_data: bool, fetch: bool): common_init.configure_logging() transformer = NYTimesUpdater.make_with_data_root(DATA_ROOT) if check_for_new_data: if not transformer.is_new_data_available(): raise Exception("No new data available") _logger.info("New data available") return if fetch: _logger.info("Fetching new data.") transformer.update_source_data() data = transformer.load_state_and_county_data() data = transformer.transform(data) common_df.write_csv(data, transformer.timeseries_output_path, _logger)
def main(replace_local_mirror: bool, generate_common_csv: bool): common_init.configure_logging() if replace_local_mirror: update_dataset_csv() if generate_common_csv: dataset = pd.read_csv( DATASET_CSV_PATH, parse_dates=[Fields.DATE], dtype={Fields.STATE_FIPS: str}, low_memory=False, ) common_df.write_csv( transform(dataset), TIMESERIES_CSV_PATH, _logger, )
def entry_point(ctx): # pylint: disable=no-value-for-parameter """Entry point for covid-data-model CLI.""" common_init.configure_logging(command=ctx.invoked_subcommand) dataset_cache.set_pickle_cache_dir() pandarallel.initialize(progress_bar=False)
area, county_names = re.match(r".+Area ([A-Z]) - (.*)[;.]", line).groups() counties = county_names.split(", ") for county in counties: # TODO(chris): Find better way match county to fips. I believe there are some # python packages that do a lot of the heavy lifting. if county == "Raines": county = "Rains" if county == "Dewitt": county = "DeWitt" county = county + " County" county_data = census_data.get_county_data(state, county) if not county_data: raise CountyNotFoundInCensusData() data.append({"fips": county_data["fips"], "state": state, "tsa_region": area}) return pd.DataFrame(data) if __name__ == "__main__": common_init.configure_logging() log = structlog.get_logger() transformer = TexasTraumaServiceAreaFipsTransformer.make_with_data_root(DATA_ROOT) output_csv = DATA_ROOT / "states" / "tx" / "tx_tsa_region_fips_map.csv" output = transformer.transform() output.to_csv(output_csv, index=False) log.info(f"Successfully wrote TSA -> FIPS map", output_file=str(output_csv))
def entry_point(): """Basic entrypoint for cortex subcommands""" common_init.configure_logging()
def entry_point(): """Basic entrypoint for cortex subcommands""" dataset_cache.set_pickle_cache_dir() common_init.configure_logging()
def update_cds_data(self): pd.read_csv(self._CDS_TIMESERIES).to_csv(os.path.join( self._CDS_DATA_DIR, "timeseries.csv"), index=False) # Record the date and time of update in versions.txt with open(os.path.join(self._CDS_DATA_DIR, "version.txt"), "w") as log: log.write("Updated on {}".format(self._stamp())) def update_all_data_files(self): self.update_cds_data() self.update_jhu_data() if __name__ == "__main__": configure_logging() update = CovidDatasetAutoUpdater() something_specified = False if args.cds: logger.info("Updating data from the Corona Data Scraper") update.update_cds_data() something_specified = True if args.jhu: logger.info("Updating data from John Hopkins University") update.update_jhu_data() something_specified = True if not something_specified: # If nothing was specified, then we assume that the user wants all datasets updated
def main(): common_init.configure_logging() log = structlog.get_logger() df = load_dataset() common_df.write_csv(df, CSV_PATH, log, index_names=[CommonFields.FIPS])