示例#1
0
def testConvertInclHisp():
    ctp = CtpMetadata()
    df = generate_test_data()
    # Change the default so that GA doesn't report race for deaths
    df.at[2, 'race_death'] = 0

    df = ctp.standardize(df)
    with pd.option_context('display.max_columns', None):
        print(df.query('state_postal == "GA" and variable_type == "cases"'))
    assert df.loc[(df['state_postal'] == 'GA') &
                  (df['variable_type'] == 'deaths'),
                  'race_includes_hispanic'].item() == 0
    assert df.loc[(df['state_postal'] == 'GA') &
                  (df['variable_type'] == 'cases'),
                  'race_includes_hispanic'].item() == 1
    # AL reported race_ethnicity_separately, so race_includes_hispanic should be set.
    assert df.loc[(df['state_postal'] == 'AL') &
                  (df['variable_type'] == 'deaths'),
                  'race_includes_hispanic'].item() == 1
    assert df.loc[(df['state_postal'] == 'AL') &
                  (df['variable_type'] == 'cases'),
                  'race_includes_hispanic'].item() == 1
    # PA reported race_ethnicity_combined, so race_includes_hispanic should not be set.
    assert df.loc[(df['state_postal'] == 'PA') &
                  (df['variable_type'] == 'deaths'),
                  'race_includes_hispanic'].item() == 0
    assert df.loc[(df['state_postal'] == 'PA') &
                  (df['variable_type'] == 'cases'),
                  'race_includes_hispanic'].item() == 0
示例#2
0
def testWriteToBq(mock_bq: mock.MagicMock, mock_csv: mock.MagicMock):
    ctp = CtpMetadata()
    kwargs = {'filename': 'test_file.csv', 'table_name': 'output_table'}
    ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs)
    result = mock_bq.call_args.args[0]
    expected = get_expected_data()
    # Check that the contents of the dataframes are the same, ignoring column order.
    assert_frame_equal(
        result.set_index([col_std.STATE_POSTAL_COL, 'variable_type'],
                         drop=False),
        expected.set_index([col_std.STATE_POSTAL_COL, 'variable_type'],
                           drop=False),
        check_like=True)
def testWriteToBq(mock_bq: mock.MagicMock, mock_csv: mock.MagicMock):
    ctp = CtpMetadata()
    kwargs = {'filename': 'test_file.csv', 'table_name': 'output_table'}
    ctp.write_to_bq('dataset', 'gcs_bucket', **kwargs)
    result = mock_bq.call_args.args[0]
    expected_cols = [
         'state_postal_abbreviation', 'reports_api', 'defines_other',
         'race_ethnicity_separately', 'race_ethnicity_combined',
         'race_mutually_exclusive', 'reports_ind', 'reports_race',
         'reports_ethnicity', 'variable_type']
    assert set(result.columns) == set(expected_cols)
    # We should have a record for each state/variable_type (e.g. cases, death)
    # combo
    assert len(result.index) == 3 * 2
    assert result.loc[result['state_postal_abbreviation'] == 'AL'].all().all()
    assert not result.loc[result['state_postal_abbreviation'] == 'PA', 'reports_api':].any().any()
    assert result.loc[result['state_postal_abbreviation'] == 'GA'].all().all()
    assert result['variable_type'].isin(['cases', 'deaths']).all()
    def _download_metadata(dataset: str) -> pd.DataFrame:
        """Downloads the metadata table from BigQuery by executing a query.

        Args:
        dataset: Name of the dataset to request metadata from

        Returns:
        A pandas.DataFrame containing the contents of the requested table."""
        client = bigquery.Client()
        job_config = bigquery.QueryJobConfig(
            default_dataset=client.get_dataset(dataset))
        sql = """
        SELECT *
        FROM {};
        """.format(CtpMetadata.get_table_name())
        return client.query(sql, job_config=job_config).to_dataframe()
from datasources.acs_population import ACSPopulation
from datasources.cdc_covid_deaths import CDCCovidDeaths
from datasources.county_adjacency import CountyAdjacency
from datasources.county_names import CountyNames
from datasources.covid_tracking_project import CovidTrackingProject
from datasources.covid_tracking_project_metadata import CtpMetadata
from datasources.household_income import HouseholdIncome
from datasources.manual_uploads import ManualUploads
from datasources.primary_care_access import PrimaryCareAccess
from datasources.state_names import StateNames
from datasources.urgent_care_facilities import UrgentCareFacilities


# Map of data source ID to the class that implements the ingestion methods for
# that data source.
DATA_SOURCES_DICT = {
    ACSPopulation.get_id(): ACSPopulation(),
    CDCCovidDeaths.get_id(): CDCCovidDeaths(),
    CountyAdjacency.get_id(): CountyAdjacency(),
    CountyNames.get_id(): CountyNames(),
    CovidTrackingProject.get_id(): CovidTrackingProject(),
    CtpMetadata.get_id(): CtpMetadata(),
    HouseholdIncome.get_id(): HouseholdIncome(),
    ManualUploads.get_id(): ManualUploads(),
    PrimaryCareAccess.get_id(): PrimaryCareAccess(),
    StateNames.get_id(): StateNames(),
    UrgentCareFacilities.get_id(): UrgentCareFacilities(),
}