def testConvertInclHisp(): ctp = CtpMetadata() df = generate_test_data() # Change the default so that GA doesn't report race for deaths df.at[2, 'race_death'] = 0 df = ctp.standardize(df) with pd.option_context('display.max_columns', None): print(df.query('state_postal == "GA" and variable_type == "cases"')) assert df.loc[(df['state_postal'] == 'GA') & (df['variable_type'] == 'deaths'), 'race_includes_hispanic'].item() == 0 assert df.loc[(df['state_postal'] == 'GA') & (df['variable_type'] == 'cases'), 'race_includes_hispanic'].item() == 1 # AL reported race_ethnicity_separately, so race_includes_hispanic should be set. assert df.loc[(df['state_postal'] == 'AL') & (df['variable_type'] == 'deaths'), 'race_includes_hispanic'].item() == 1 assert df.loc[(df['state_postal'] == 'AL') & (df['variable_type'] == 'cases'), 'race_includes_hispanic'].item() == 1 # PA reported race_ethnicity_combined, so race_includes_hispanic should not be set. assert df.loc[(df['state_postal'] == 'PA') & (df['variable_type'] == 'deaths'), 'race_includes_hispanic'].item() == 0 assert df.loc[(df['state_postal'] == 'PA') & (df['variable_type'] == 'cases'), 'race_includes_hispanic'].item() == 0
def testWriteToBq(mock_bq: mock.MagicMock, mock_csv: mock.MagicMock): ctp = CtpMetadata() kwargs = {'filename': 'test_file.csv', 'table_name': 'output_table'} ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs) result = mock_bq.call_args.args[0] expected = get_expected_data() # Check that the contents of the dataframes are the same, ignoring column order. assert_frame_equal( result.set_index([col_std.STATE_POSTAL_COL, 'variable_type'], drop=False), expected.set_index([col_std.STATE_POSTAL_COL, 'variable_type'], drop=False), check_like=True)
def testWriteToBq(mock_bq: mock.MagicMock, mock_csv: mock.MagicMock): ctp = CtpMetadata() kwargs = {'filename': 'test_file.csv', 'table_name': 'output_table'} ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs) result = mock_bq.call_args.args[0] expected_cols = [ 'state_postal_abbreviation', 'reports_api', 'defines_other', 'race_ethnicity_separately', 'race_ethnicity_combined', 'race_mutually_exclusive', 'reports_ind', 'reports_race', 'reports_ethnicity', 'variable_type'] assert set(result.columns) == set(expected_cols) # We should have a record for each state/variable_type (e.g. cases, death) # combo assert len(result.index) == 3 * 2 assert result.loc[result['state_postal_abbreviation'] == 'AL'].all().all() assert not result.loc[result['state_postal_abbreviation'] == 'PA', 'reports_api':].any().any() assert result.loc[result['state_postal_abbreviation'] == 'GA'].all().all() assert result['variable_type'].isin(['cases', 'deaths']).all()
def _download_metadata(dataset: str) -> pd.DataFrame: """Downloads the metadata table from BigQuery by executing a query. Args: dataset: Name of the dataset to request metadata from Returns: A pandas.DataFrame containing the contents of the requested table.""" client = bigquery.Client() job_config = bigquery.QueryJobConfig( default_dataset=client.get_dataset(dataset)) sql = """ SELECT * FROM {}; """.format(CtpMetadata.get_table_name()) return client.query(sql, job_config=job_config).to_dataframe()
from datasources.acs_population import ACSPopulation from datasources.cdc_covid_deaths import CDCCovidDeaths from datasources.county_adjacency import CountyAdjacency from datasources.county_names import CountyNames from datasources.covid_tracking_project import CovidTrackingProject from datasources.covid_tracking_project_metadata import CtpMetadata from datasources.household_income import HouseholdIncome from datasources.manual_uploads import ManualUploads from datasources.primary_care_access import PrimaryCareAccess from datasources.state_names import StateNames from datasources.urgent_care_facilities import UrgentCareFacilities # Map of data source ID to the class that implements the ingestion methods for # that data source. DATA_SOURCES_DICT = { ACSPopulation.get_id(): ACSPopulation(), CDCCovidDeaths.get_id(): CDCCovidDeaths(), CountyAdjacency.get_id(): CountyAdjacency(), CountyNames.get_id(): CountyNames(), CovidTrackingProject.get_id(): CovidTrackingProject(), CtpMetadata.get_id(): CtpMetadata(), HouseholdIncome.get_id(): HouseholdIncome(), ManualUploads.get_id(): ManualUploads(), PrimaryCareAccess.get_id(): PrimaryCareAccess(), StateNames.get_id(): StateNames(), UrgentCareFacilities.get_id(): UrgentCareFacilities(), }