def testStandardize(): ctp = CovidTrackingProject() df = get_test_data_as_df() df = ctp.standardize(df) expected_cols = { 'date', col_std.STATE_POSTAL_COL, col_std.RACE_COL, 'variable_type', 'value' } assert set(df.columns) == expected_cols expected_race_categories = [r.race for r in col_std.Race] assert set(df[col_std.RACE_COL]).issubset(set(expected_race_categories))
def testMergeWithMetadata(): ctp = CovidTrackingProject() df = get_test_data_as_df() mdf = get_test_metadata_as_df() df = ctp.standardize(df) df = ctp.merge_with_metadata(df, mdf) expected_cols = { 'date', col_std.STATE_POSTAL_COL, col_std.RACE_COL, 'variable_type', 'value', 'reports_race', 'race_ethnicity_separately' } assert set(df.columns) == expected_cols expected_race_categories = { col_std.Race.AIAN.race, col_std.Race.API.race, col_std.Race.ASIAN.race, col_std.Race.BLACK.race, col_std.Race.HISP.race, col_std.Race.INDIGENOUS.race, col_std.Race.NHPI.race, col_std.Race.MULTI.race, col_std.Race.WHITE.race, col_std.Race.NH.race, col_std.Race.ETHNICITY_UNKNOWN.race, col_std.Race.OTHER_NONSTANDARD.race, col_std.Race.UNKNOWN.race, col_std.Race.TOTAL.race } assert set(df[col_std.RACE_COL]) == expected_race_categories