def test_provenance(): region_as = Region.from_state("AS") region_tx = Region.from_state("TX") metrics_as = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 2, 4, 6], provenance="pt_src1"), CommonFields.TOTAL_TESTS: [100, 200, 300, 400], } metrics_tx = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([1, 2, 3, 4], provenance="pt_src2"), CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral( [10, 20, 30, 40], provenance="pos_viral" ), CommonFields.TOTAL_TESTS: [100, 200, 300, 400], } dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx}) methods = [ DivisionMethod( DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS ), DivisionMethod( DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS ), ] all_methods = AllMethods.run(dataset_in, methods, diff_days=3) expected_as = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.02], provenance=["pt_src1"])} expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], provenance="pos_viral")} expected_positivity = test_helpers.build_dataset( {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04" ) test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
def test_annotation(rt_dataset, icu_dataset): region = Region.from_state("IL") tag = test_helpers.make_tag(date="2020-04-01", original_observation=10.0) death_url = UrlStr("http://can.com/death_source") cases_urls = [UrlStr("http://can.com/one"), UrlStr("http://can.com/two")] new_cases_url = UrlStr("http://can.com/new_cases") ds = test_helpers.build_default_region_dataset( { CommonFields.CASES: TimeseriesLiteral( [100, 200, 300], provenance="NYTimes", source_url=cases_urls), # NEW_CASES has only source_url set, to make sure that an annotation is still output. CommonFields.NEW_CASES: TimeseriesLiteral([100, 100, 100], source_url=new_cases_url), CommonFields.CONTACT_TRACERS_COUNT: [10] * 3, CommonFields.ICU_BEDS: TimeseriesLiteral([20, 20, 20], provenance="NotFound"), CommonFields.CURRENT_ICU: [5, 5, 5], CommonFields.DEATHS: TimeseriesLiteral( [2, 3, 2], annotation=[tag], source_url=death_url), }, region=region, static={ CommonFields.POPULATION: 100_000, CommonFields.STATE: "IL", CommonFields.CAN_LOCATION_PAGE_URL: "http://covidactnow.org/foo/bar", }, )
def test_pyseir_end_to_end_dc(tmp_path): # Runs over a single state which tests state filtering + running over more than # a single fips. with unittest.mock.patch("pyseir.utils.OUTPUT_DIR", str(tmp_path)): region = Region.from_state("DC") pipelines = cli._build_all_for_states(states=["DC"]) # Checking to make sure that build all for states properly filters and only # returns DC data assert len(pipelines) == 2
def il_regional_input(rt_dataset, icu_dataset): region = Region.from_state("IL") regional_data = combined_datasets.load_us_timeseries_dataset( ).get_regions_subset([region]) # TODO(tom): add test positivity back in after PR 728 is merged. # test_positivity_results = test_positivity.AllMethods.run(regional_data) # regional_data = regional_data.join_columns(test_positivity_results.test_positivity) return api_v2_pipeline.RegionalInput.from_region_and_model_output( region, regional_data, rt_dataset, icu_dataset)
def il_regional_input(rt_dataset, icu_dataset): region = Region.from_state("IL") regional_data = combined_datasets.load_us_timeseries_dataset( ).get_regions_subset([region]) regional_data = test_positivity.run_and_maybe_join_columns( regional_data, structlog.get_logger()) return api_v2_pipeline.RegionalInput.from_region_and_model_output( region, regional_data, rt_dataset, icu_dataset)
def test_preserve_tags(): region_as = Region.from_state("AS") region_tx = Region.from_state("TX") tag1 = test_helpers.make_tag(type=TagType.CUMULATIVE_LONG_TAIL_TRUNCATED, date="2020-04-04") tag2 = test_helpers.make_tag(type=TagType.CUMULATIVE_TAIL_TRUNCATED, date="2020-04-04") tag_drop = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-01") tag3 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-04") tag4 = test_helpers.make_tag(type=TagType.ZSCORE_OUTLIER, date="2020-04-03") metrics_as = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral( [1, 2, 3, 4], annotation=[tag1], provenance="pos" ), CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag2]), } metrics_tx = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([None, None, 3, 4], annotation=[tag_drop]), CommonFields.POSITIVE_TESTS_VIRAL: [10, 20, 30, 40], CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400], annotation=[tag3, tag4]), } dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx}) methods = [ DivisionMethod( DatasetName("method1"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS ), DivisionMethod( DatasetName("method2"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS ), ] all_methods = AllMethods.run(dataset_in, methods, diff_days=3) expected_as = { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.01], provenance="pos", annotation=[tag1, tag2] ) } expected_tx = {CommonFields.TEST_POSITIVITY: TimeseriesLiteral([0.1], annotation=[tag3, tag4])} expected_positivity = test_helpers.build_dataset( {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-04" ) test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
def test_top_level_metrics_no_pos_neg_tests_has_positivity_ratio(): ny_region = Region.from_state("NY") metrics = { CommonFields.CASES: [10, 20, 30, 40], CommonFields.NEW_CASES: [10, 10, 10, 10], CommonFields.TEST_POSITIVITY: [0.02, 0.03, 0.04, 0.05], } latest = { CommonFields.POPULATION: 100_000, CommonFields.FIPS: "36", CommonFields.STATE: "NY", CommonFields.ICU_BEDS: 10, }
def update_test_combined_data(truncate_dates: bool, state: List[str]): us_dataset = combined_datasets.load_us_timeseries_dataset() # Keep only a small subset of the regions so we have enough to exercise our code in tests. test_subset = us_dataset.get_regions_subset([ RegionMask(states=[s.strip() for s in state]), Region.from_fips("48201"), Region.from_fips("48301"), Region.from_fips("20161"), Region.from_state("TX"), Region.from_state("KS"), ]) if truncate_dates: dates = test_subset.timeseries_bucketed.index.get_level_values( CommonFields.DATE) date_range_mask = (dates >= "2021-01-01") & (dates < "2021-04-01") test_subset = dataclasses.replace( test_subset, timeseries_bucketed=test_subset.timeseries_bucketed. loc[date_range_mask]) test_subset.write_to_wide_dates_csv( dataset_utils.TEST_COMBINED_WIDE_DATES_CSV_PATH, dataset_utils.TEST_COMBINED_STATIC_CSV_PATH)
def test_basic(): region_tx = Region.from_state("TX") region_sf = Region.from_fips("06075") region_hi = Region.from_state("HI") # Add a timeseries with a tag to make sure they are preserved. ts_with_tag = TimeseriesLiteral( [0, 0, 0], annotation=[test_helpers.make_tag(date="2020-04-01")]) ds_in = test_helpers.build_dataset({ region_tx: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, 0] }, region_sf: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1] }, region_hi: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, None], CommonFields.CASES: ts_with_tag, }, }) with structlog.testing.capture_logs() as logs: ds_out = zeros_filter.drop_all_zero_timeseries( ds_in, [CommonFields.VACCINES_DISTRIBUTED]) ds_expected = test_helpers.build_dataset({ region_sf: { CommonFields.VACCINES_DISTRIBUTED: [0, 0, 1] }, region_hi: { CommonFields.CASES: ts_with_tag }, }) log = more_itertools.one(logs) assert log["event"] == zeros_filter.DROPPING_TIMESERIES_WITH_ONLY_ZEROS assert pd.MultiIndex.from_tuples([ (region_hi.location_id, CommonFields.VACCINES_DISTRIBUTED), (region_tx.location_id, CommonFields.VACCINES_DISTRIBUTED), ]).equals(log["dropped"]) test_helpers.assert_dataset_like(ds_expected, ds_out)
def test_default_positivity_methods(): # This test intentionally doesn't pass any methods to AllMethods.run to run the methods used # in production. region_as = Region.from_state("AS") region_tx = Region.from_state("TX") metrics_as = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 1, 2, 3, 4, 5, 6, 7], provenance="src1"), CommonFields.NEGATIVE_TESTS: TimeseriesLiteral( [10, 19, 28, 37, 46, 55, 64, 73], provenance="src1" ), } metrics_tx = { CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral( [2, 4, 6, 8, 10, 12, 14, 16], provenance="pos_tests" ), CommonFields.TOTAL_TESTS_VIRAL: [10, 20, 30, 40, 50, 60, 70, 80], } dataset_in = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx}) # TODO(tom): Once test positivity code seems stable remove call to datetime.today() in # has_recent_data and remove this freeze_time. with freeze_time("2020-04-14"): all_methods = AllMethods.run(dataset_in, diff_days=1) expected_as = { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], provenance="src1", ) } expected_tx = { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2], provenance="pos_tests" ) } expected_positivity = test_helpers.build_dataset( {region_as: expected_as, region_tx: expected_tx}, start_date="2020-04-02", ) test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity)
def il_regional_input_empty_test_positivity_column(rt_dataset, icu_dataset): region = Region.from_state("IL") regional_data = combined_datasets.load_us_timeseries_dataset( ).get_regions_subset([region]) empty_test_positivity = timeseries.MultiRegionTimeseriesDataset.from_timeseries_df( pd.DataFrame([], columns=[ CommonFields.LOCATION_ID, CommonFields.DATE, CommonFields.TEST_POSITIVITY ])) regional_data = regional_data.join_columns(empty_test_positivity) return api_v2_pipeline.RegionalInput.from_region_and_model_output( region, regional_data, rt_dataset, icu_dataset)
def test_column_present_with_no_data(): region_tx = Region.from_state("TX") ds = test_helpers.build_dataset( {region_tx: {CommonFields.TOTAL_TESTS: [100, 200, 400]}}, timeseries_columns=[CommonFields.POSITIVE_TESTS], ) method = DivisionMethod( DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS, recent_days=1, ) with pytest.raises(test_positivity.NoColumnsWithDataException): AllMethods.run(ds, [method], diff_days=1)
def test_aggregate_reporting_ratio(reporting_ratio, expected_na): ny_region = Region.from_state("NY") az_region = Region.from_state("AZ") us_region = Region.from_iso1("us") aggregate_map = { ny_region: us_region, az_region: us_region, } metrics = {ny_region: {CommonFields.CASES: [100]}, az_region: {CommonFields.CASES: [None]}} static = {ny_region: {CommonFields.POPULATION: 900}, az_region: {CommonFields.POPULATION: 100}} dataset = test_helpers.build_dataset(metrics, static_by_region_then_field_name=static) agg = statistical_areas.CountyToCBSAAggregator( county_map={ny_region.fips: us_region.fips, az_region.fips: us_region.fips}, cbsa_title_map={us_region.fips: "Stat Area 1"}, aggregations=[], ) aggregation = agg.aggregate(dataset, reporting_ratio_required_to_aggregate=reporting_ratio) cases = aggregation.timeseries[CommonFields.CASES] if expected_na: assert not len(cases) else: assert len(cases)
def test_top_level_metrics_no_pos_neg_tests_no_positivity_ratio(): region_ny = Region.from_state("NY") # All of positive_tests, negative_tests are empty and test_positivity is absent. Make sure # other metrics are still produced. metrics = { CommonFields.CASES: [10, 20, 30, 40], CommonFields.NEW_CASES: [10, 10, 10, 10], CommonFields.CONTACT_TRACERS_COUNT: [1, 2, 3, 4], } latest = { CommonFields.POPULATION: 100_000, CommonFields.FIPS: "36", CommonFields.STATE: "NY", CommonFields.ICU_BEDS: 10, }
def test_top_level_metrics_incomplete_latest(): region_ny = Region.from_state("NY") # This test doesn't have ICU_BEDS set in `latest`. It checks that the metrics are still built. metrics = { CommonFields.CASES: [10, 20, None, 40], CommonFields.NEW_CASES: [10, 10, 10, 10], CommonFields.TEST_POSITIVITY: [None, 0.1, 0.1, 0.1], CommonFields.CONTACT_TRACERS_COUNT: [1, 2, 3, 4], CommonFields.CURRENT_ICU: [10, 10, 10, 10], CommonFields.CURRENT_ICU_TOTAL: [20, 20, 20, 20], CommonFields.ICU_BEDS: [None, None, None, None], } latest = { CommonFields.POPULATION: 100_000, CommonFields.STATE: "NY", # ICU_BEDS not set }
def test_dataclass_include_exclude(): """Tests datasource_regions using mock data for speed.""" region_data = {CommonFields.CASES: [100, 200, 300], CommonFields.DEATHS: [0, 1, 2]} regions_orig = [Region.from_state(state) for state in "AZ CA NY IL TX".split()] + [ Region.from_fips(fips) for fips in "06037 06045 17031 17201".split() ] dataset_orig = test_helpers.build_dataset({region: region_data for region in regions_orig}) # Make a new subclass to keep this test separate from others in the make_dataset lru_cache. class DataSourceForTest(data_source.DataSource): EXPECTED_FIELDS = [CommonFields.CASES, CommonFields.DEATHS] SOURCE_TYPE = "DataSourceForTest" @classmethod def make_dataset(cls) -> timeseries.MultiRegionDataset: return dataset_orig orig_data_source_cls = DataSourceForTest orig_ds = orig_data_source_cls.make_dataset() assert "iso1:us#iso2:us-tx" in orig_ds.location_ids assert "iso1:us#iso2:us-ny" in orig_ds.location_ids ny_source = combined_datasets.datasource_regions( orig_data_source_cls, RegionMask(states=["NY"]) ) ny_ds = ny_source.make_dataset() assert "iso1:us#iso2:us-tx" not in ny_ds.location_ids assert "iso1:us#iso2:us-ny" in ny_ds.location_ids ca_counties_without_la_source = combined_datasets.datasource_regions( orig_data_source_cls, RegionMask(AggregationLevel.COUNTY, states=["CA"]), exclude=Region.from_fips("06037"), ) ds = ca_counties_without_la_source.make_dataset() assert "iso1:us#iso2:us-tx" not in ds.location_ids assert "iso1:us#iso2:us-ca" not in ds.location_ids assert "iso1:us#iso2:us-ca#fips:06045" in ds.location_ids assert "iso1:us#iso2:us-ca#fips:06037" not in ds.location_ids # Just Cook County, IL ds = combined_datasets.datasource_regions( orig_data_source_cls, include=Region.from_fips("17031") ).make_dataset() assert ds.location_ids.to_list() == ["iso1:us#iso2:us-il#fips:17031"]
def test_annotation_all_fields_copied(rt_dataset, icu_dataset): region = Region.from_state("IL") # Create a dataset with bogus data for every CommonFields, excluding a few that are not # expected to have timeseries values. fields_excluded = { *TIMESERIES_INDEX_FIELDS, *GEO_DATA_COLUMNS, CommonFields.LOCATION_ID } ds = test_helpers.build_default_region_dataset( { field: TimeseriesLiteral([100, 200, 300], provenance="NYTimes") for field in CommonFields if field not in fields_excluded }, region=region, static={ CommonFields.POPULATION: 100_000, CommonFields.STATE: "IL", CommonFields.CAN_LOCATION_PAGE_URL: "http://covidactnow.org/foo/bar", }, )
def _transform_one_override( override: Mapping, cbsa_to_counties_map: Mapping[Region, List[Region]]) -> Filter: region_str = override["region"] if re.fullmatch(r"[A-Z][A-Z]", region_str): region = Region.from_state(region_str) elif re.fullmatch(r"\d{5}", region_str): region = Region.from_fips(region_str) else: raise ValueError(f"Invalid region: {region_str}") include_str = override["include"] if include_str == "region": regions_included = [region] elif include_str == "region-and-subregions": if region.is_state(): regions_included = [RegionMask(states=[region.state])] elif region.level == AggregationLevel.CBSA: regions_included = [region] + cbsa_to_counties_map[region] else: raise ValueError( "region-and-subregions only valid for a state and CBSA") elif include_str == "subregions": if not region.is_state(): raise ValueError("subregions only valid for a state") regions_included = [ RegionMask(AggregationLevel.COUNTY, states=[region.state]) ] else: raise ValueError(f"Invalid include: {include_str}") return Filter( regions_included=regions_included, fields_included=_METRIC_TO_FIELDS[override["metric"]], internal_note=override["context"], public_note=override.get("disclaimer", ""), drop_observations=bool(override["blocked"]), )
def test_make_latest_from_timeseries_dont_touch_county(): data = read_csv_and_index_fips_date( "fips,county,state,country,date,aggregate_level,m1,m2\n" "95123,Smith Countyy,YY,USA,2020-04-01,county,1,\n" "97123,Smith Countzz,ZZ,USA,2020-04-01,county,2,\n" "56,,WY,USA,2020-04-01,state,3,\n" ).reset_index() ds = timeseries.MultiRegionDataset.from_fips_timeseries_df(data) def get_latest(region) -> dict: """Returns an interesting subset of latest for given region""" latest = ds.get_one_region(region).latest return {key: latest[key] for key in ["county", "m1", "m2"] if latest.get(key) is not None} assert get_latest(Region.from_fips("95123")) == { "m1": 1, "county": "Smith Countyy", } assert get_latest(Region.from_fips("97123")) == { "m1": 2, "county": "Smith Countzz", } assert get_latest(Region.from_state("WY")) == {"m1": 3}
import dataclasses from functools import lru_cache from libs.datasets import AggregationLevel from libs.datasets import data_source import pandas as pd from covidactnow.datapublic.common_fields import CommonFields from libs.datasets.sources import can_scraper_helpers as ccd_helpers from libs.datasets.timeseries import MultiRegionDataset from libs.pipeline import Region DC_COUNTY_LOCATION_ID = Region.from_fips("11001").location_id DC_STATE_LOCATION_ID = Region.from_state("DC").location_id def _remove_trailing_zeros(series: pd.Series) -> pd.Series: series = pd.Series(series.values.copy(), index=series.index.get_level_values(CommonFields.DATE)) index = series.loc[series != 0].last_valid_index() if index is None: # If test positivity is 0% the entire time, considering the data inaccurate, returning # none. series[:] = None return series series[index + pd.DateOffset(1):] = None return series
def test_recent_days(): region_as = Region.from_state("AS") region_tx = Region.from_state("TX") metrics_as = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([0, 2, 4, 6], provenance="pos"), CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral( [0, 20, None, None], provenance="pos_viral" ), CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400]), } metrics_tx = { CommonFields.POSITIVE_TESTS: TimeseriesLiteral([1, 2, 3, 4], provenance="pos"), CommonFields.POSITIVE_TESTS_VIRAL: TimeseriesLiteral( [10, 20, 30, 40], provenance="pos_viral" ), CommonFields.TOTAL_TESTS: TimeseriesLiteral([100, 200, 300, 400]), } ds = test_helpers.build_dataset({region_as: metrics_as, region_tx: metrics_tx}) methods = [ DivisionMethod( DatasetName("method1"), CommonFields.POSITIVE_TESTS_VIRAL, CommonFields.TOTAL_TESTS ), DivisionMethod( DatasetName("method2"), CommonFields.POSITIVE_TESTS, CommonFields.TOTAL_TESTS ), ] methods = _replace_methods_attribute(methods, recent_days=2) all_methods = AllMethods.run(ds, methods, diff_days=1) expected_positivity = test_helpers.build_dataset( { region_as: { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.02, 0.02, 0.02], provenance="pos" ) }, region_tx: { CommonFields.TEST_POSITIVITY: TimeseriesLiteral( [0.1, 0.1, 0.1], provenance="pos_viral" ) }, }, start_date="2020-04-02", ) test_helpers.assert_dataset_like(all_methods.test_positivity, expected_positivity) assert all_methods.test_positivity.get_one_region(region_as).provenance == { CommonFields.TEST_POSITIVITY: ["pos"] } assert all_methods.test_positivity.get_one_region(region_tx).provenance == { CommonFields.TEST_POSITIVITY: ["pos_viral"] } methods = _replace_methods_attribute(methods, recent_days=3) all_methods = AllMethods.run(ds, methods, diff_days=1) positivity_provenance = all_methods.test_positivity.provenance assert positivity_provenance.loc["iso1:us#iso2:us-as"].to_dict() == { CommonFields.TEST_POSITIVITY: "pos_viral" } assert positivity_provenance.loc["iso1:us#iso2:us-tx"].to_dict() == { CommonFields.TEST_POSITIVITY: "pos_viral" }
def derive_ca_county_vaccine_pct( ds_in: MultiRegionDataset) -> MultiRegionDataset: """Derives vaccination metrics for CA counties based on State 1st vs 2nd dose reporting.""" ca_county_dataset = ds_in.get_subset( aggregation_level=AggregationLevel.COUNTY, state="CA") # Get county level time-series in distribution bucket "all". Keep the bucket in the index so # that the concat at the bottom of this function has the correct labels for each time-series. ca_county_wide = ca_county_dataset.timeseries_bucketed_wide_dates.xs( DemographicBucket.ALL, level=PdFields.DEMOGRAPHIC_BUCKET, drop_level=False) fields_to_check = [ CommonFields.VACCINATIONS_INITIATED, CommonFields.VACCINATIONS_COMPLETED, CommonFields.VACCINATIONS_INITIATED_PCT, CommonFields.VACCINATIONS_COMPLETED_PCT, ] # Assert that possible fields we want to estimate are all NA - if one of these is # not NA, likely do not need to estimate anymore and this methodology can be removed. assert ca_county_wide.loc[(slice(None), fields_to_check), :].isna().all().all() ca_state_wide = ds_in.get_regions_subset([ Region.from_state("CA") ]).timeseries_bucketed_wide_dates.xs(DemographicBucket.ALL, level=PdFields.DEMOGRAPHIC_BUCKET, drop_level=False) # Drop location index because not used to apply to county level data ca_state_wide = ca_state_wide.droplevel(CommonFields.LOCATION_ID) ca_administered = ca_state_wide.loc( axis=0)[CommonFields.VACCINES_ADMINISTERED] initiated_ratio_of_administered = ( ca_state_wide.loc(axis=0)[CommonFields.VACCINATIONS_INITIATED] / ca_administered) completed_ratio_of_administered = ( ca_state_wide.loc(axis=0)[CommonFields.VACCINATIONS_COMPLETED] / ca_administered) county_administered = ca_county_wide.loc( axis=0)[:, CommonFields.VACCINES_ADMINISTERED] estimated_initiated = county_administered * initiated_ratio_of_administered estimated_completed = county_administered * completed_ratio_of_administered vaccines_initiated_pct = (estimated_initiated.div( ca_county_dataset.static.loc[:, CommonFields.POPULATION], level=CommonFields.LOCATION_ID, axis="index", ) * 100) vaccines_initiated_pct = vaccines_initiated_pct.rename( index={ CommonFields.VACCINES_ADMINISTERED: CommonFields.VACCINATIONS_INITIATED_PCT }, level=PdFields.VARIABLE, ) vaccines_completed_pct = (estimated_completed.div( ca_county_dataset.static.loc[:, CommonFields.POPULATION], level=CommonFields.LOCATION_ID, axis="index", ) * 100) vaccines_completed_pct = vaccines_completed_pct.rename( index={ CommonFields.VACCINES_ADMINISTERED: CommonFields.VACCINATIONS_COMPLETED_PCT }, level=PdFields.VARIABLE, ) all_wide = ds_in.timeseries_bucketed_wide_dates # Because we assert that existing dataset does not have CA county VACCINATIONS_COMPLETED_PCT # or VACCINATIONS_INITIATED_PCT we can safely combine the existing rows with new derived rows return ds_in.replace_timeseries_wide_dates( [vaccines_completed_pct, vaccines_initiated_pct, all_wide])