def verify_maup(state_prec_gdf, state_report): state_county_df = census_us_county_gdf[ census_us_county_gdf["STATEFP"] == state_report.fips ] # match their projections (necessary for maup.assign) if not state_prec_gdf.crs: state_prec_gdf = state_prec_gdf.set_crs("EPSG:4326") state_prec_gdf = state_prec_gdf.to_crs(state_county_df.crs) assert state_prec_gdf.crs == state_county_df.crs gdf = fix_buffer(state_prec_gdf) try: maup.assign(gdf, state_county_df) print("MAUP assign was successful") return True except Exception as error: print("Unable to use MAUP assign: \n\n", error) return False
def test_assign_dispatches_to_without_area_and_with_area( four_square_grid, squares_some_neat_some_overlapping, crs): other = four_square_grid.set_index("ID") other.crs = crs print(squares_some_neat_some_overlapping.crs, other.crs) assignment = assign(squares_some_neat_some_overlapping, other) expected = pandas.Series(["a", "a", "b", "d", "b"], index=squares_some_neat_some_overlapping.index) assert (expected == assignment).all()
def test_example_case(): # Losely based off test_example_case function in test_prorate.py blocks = geopandas.read_file("zip://./examples/blocks.zip") precincts = geopandas.read_file("zip://./examples/new_precincts.zip") columns = ["TOTPOP", "BVAP", "WVAP", "HISP"] assignment = assign(blocks, precincts) precincts[columns] = blocks[columns].groupby(assignment).sum() assert (precincts[columns] > 0).sum().sum() > len(precincts) for col in columns: # fails because it does not neatly cover assert abs(precincts[col].sum() - blocks[col].sum()) / blocks[col].sum() < 0.5
def test_crop_to(): blocks = geopandas.read_file("zip://./examples/blocks.zip") old_precincts = geopandas.read_file("zip://./examples/precincts.zip") new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip") columns = ["SEN18D", "SEN18R"] # Calculate without cropping pieces = maup.intersections(old_precincts, new_precincts, area_cutoff=0) weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum() weights = maup.normalize(weights, level=0) new_precincts[columns] = maup.prorate(pieces, old_precincts[columns], weights=weights) # Calculate with cropping old_precincts["geometries"] = maup.crop_to(old_precincts, new_precincts) new_precincts_cropped = new_precincts.copy() pieces = maup.intersections(old_precincts, new_precincts_cropped, area_cutoff=0) weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum() weights = maup.normalize(weights, level=0) new_precincts_cropped[columns] = maup.prorate(pieces, old_precincts[columns], weights=weights) assert new_precincts_cropped.area.sum() != new_precincts.area.sum() diff_sum = 0 for col in columns: diff = new_precincts_cropped[col].sum() - new_precincts[col].sum() assert diff >= 0 diff_sum += diff # Ideally this would be strictly positive (which would mean less votes are lost after cropping) # but crop_to doesn't resolve the missing votes errors yet. assert diff_sum >= 0
def test_example_case(): blocks = geopandas.read_file("zip://./examples/blocks.zip") old_precincts = geopandas.read_file("zip://./examples/precincts.zip") new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip") columns = ["SEN18D", "SEN18R"] # Include area_cutoff=0 to ignore any intersections with no area, # like boundary intersections, which we do not want to include in # our proration. pieces = intersections(old_precincts, new_precincts, area_cutoff=0) # Weight by prorated population from blocks weights = blocks["TOTPOP"].groupby(assign(blocks, pieces)).sum() # Use blocks to estimate population of each piece new_precincts[columns] = prorate(pieces, old_precincts[columns], weights=weights) assert (new_precincts[columns] > 0).sum().sum() > len(new_precincts) / 2
def prorate(target, source, targetcol, sourcecol, columns): """ Prorates data the source geometries down to the target geometries. :param target: Target geometries. :param source: Source geometries. :param targetcol: Column for target weights. :param sourcecol: Column for source weights. :param columns: Columns to prorate. :return: Geodataframe with prorated data. """ assignment = maup.assign(target, source) weights = target[targetcol] / assignment.map(source[sourcecol]) prorated = maup.prorate(assignment, source[columns], weights) target[columns] = prorated return target
def assign_GEOID(state_prec_gdf, state_fips): """ return the (GeoDataFrame) with a column 'GEOID' indicating a precinct's county :state_prec_gdf: (GeoDataFrame) with statewide precinct level election results :state_fips: (int) Federal Information Processing Standard state code returned GeoDataFrame's 'GEOID' column will conform to the GEOID spec: Elements of the GEOID column are 5 character strings. The first 2 characters are the StateFP code and the last 3 characters are the CountyFP code. e.g. Massachusetts' StateFP = '25' Essex County's CountyFP = '009' Essex County, Massachusetts' GEODID = '25009' If either code has fewer digits than are allocated, the string representation should be zero-padded from the left. e.g. Alaska (StateFP = 2) should be '02'. """ state_fips_str = str(state_fips).zfill(2) state_county_df = census_us_county_gdf[ census_us_county_gdf["STATEFP"] == state_fips_str ] # match their projections (necessary for maup.assign) if not state_prec_gdf.crs: state_prec_gdf = state_prec_gdf.set_crs("EPSG:4326") state_prec_gdf = state_prec_gdf.to_crs(state_county_df.crs) assert state_prec_gdf.crs == state_county_df.crs state_prec_gdf["maup_assignment"] = maup.assign( fix_buffer(state_prec_gdf), state_county_df ) state_prec_gdf["GEOID"] = state_prec_gdf["maup_assignment"].map( lambda idx: state_fips_str + str(state_county_df.loc[idx]["COUNTYFP"]).zfill(3) ) n_counties_observed = state_prec_gdf["GEOID"].nunique() n_counties_expected = state_county_df["GEOID"].nunique() assert n_counties_expected == n_counties_observed return state_prec_gdf
def dissolve(source, join="CONGDIST", columns=[]): """ Dissolves source geography boundaries based on a column which identifies the smaller geography with the larger one. :param source: String or geodataframe; string is a filepath, geodataframe is source. :param join: String; column on which boundaries are joined; optional. :param columns: List; columns to sum when dissolving; optional. :return: Geodataframe with dissolved boundaries. """ # Dissolve VTD geometries into congressional district ones. source = gpd.read_file(source) if type(source) == str else source target = source[[join, "geometry"]].dissolve(by=join) # If columns are specified, we aggregate data from VTDs to whatever the # target is. If a file destination is provided, send the output to a # shapefile. if len(columns) > 0: assignment = maup.assign(source, target) target[columns] = source[columns].groupby(assignment).sum() return target
partnership = gpd.read_file( '/Users/hopecj/projects/gerryspam/NJ/dat/partnership-2016/unzipped/extracted/precincts/compiled.shp' ) partnership[ "loc_prec"] = partnership['COUNTYFP'] + ',' + partnership['NAMELSAD'] partnership['loc_prec'].nunique() partnership[partnership.duplicated(['loc_prec'])] partnership.shape partnership = partnership.dissolve( by='loc_prec', as_index=False) #dissolve precincts with the same name partnership.rename(columns={"loc_prec": "id"}, inplace=True) partnership = partnership[["id", "geometry"]] # voter roll vr = gpd.read_file( '/Users/hopecj/projects/gerryspam/NJ/dat/Geocoded VR/NJ_CivisVRblocks.shp') # give voter roll precinct labels vr.crs partnership.crs assert vr.crs == partnership.crs partnership.crs = "epsg:2160" partnership.to_crs(vr.crs, inplace=True) partnership.to_file("out_partnership.shp") vr.to_file("out_vr.shp") # assign voter roll to precincts assignment = maup.assign(vr, partnership) assignment.isna().sum() vr["prec_2019"] = assignment
#### PART 1: DISAGGREGATE ACS19 FROM BLOCK GROUPS TO 2010 BLOCKS ####################################################################### # set acs cols - all demographic columns in block groups bgs.dtypes blocks.dtypes bg_cols = list(bgs.columns)[12:37] bgs[bg_cols] = bgs[bg_cols].astype(float) bgs.dtypes # assign blocks to block groups and disaggregate based on population assignment = maup.assign(blocks, bgs) # prorate ACS columns by 2010 pop weights = blocks.tot / assignment.map(bgs.tot10) prorated = maup.prorate(assignment, bgs[bg_cols], weights) blocks[bg_cols] = prorated #### TESTING # check total population at block level and block group level blocks['tot19'].sum() #12792129 bgs['tot19'].sum() #12791530 blocks['tot'].sum() #12702379 bgs['tot10'].sum() #12702379 # # save blocks with dec 10 + acs 19
# checkDataFrame(bgs) ########################################### #### PART 2: DISAGGREGATE ACS19 to 2010 BLOCKS ########################################### # set acs cols - all demographic columns in block groups bgs.dtypes blocks.dtypes acs_cols = list(bgs.columns)[12:31] block_cols = list(blocks.columns)[15:62] # assign blocks to block groups and disaggregate based on population assignment = maup.assign(blocks, bgs) # We prorate the vote totals according to each block's share of the overall bg population: weights = blocks.tot / assignment.map(bgs.tot10) prorated = maup.prorate(assignment, bgs[acs_cols], weights) blocks[acs_cols] = prorated test = bgs[acs_cols].dtypes test2 = bgs[acs_cols] #### TESTING # check total population at block level and block group level blocks['tot19'].sum() #9966182 bgs['tot19'].sum() #9965265 blocks['tot'].sum() #9883640 bgs['tot10'].sum() #9883640
il2010 = gpd.read_file(chicago_2010_file) il2000 = gpd.read_file(chicago_2000_file) il1990 = gpd.read_file(chicago_1990_file) blocks = gpd.read_file(chicago_blocks_file) il2000.crs = il2010.crs il1990.crs = il2010.crs blocks.to_crs(il2010.crs, inplace=True) for c in columns: il2010[c] = il2010[c].astype(int) il2000[c] = il2000[c].astype(int) il1990[c] = il1990[c].astype(int) pieces2000 = maup.intersections(il2000, il2010, area_cutoff=0) pieces1990 = maup.intersections(il1990, il2010, area_cutoff=0) weights2000 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces2000)).sum() weights1990 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces1990)).sum() weights2000 = maup.normalize(weights2000, level=0) weights1990 = maup.normalize(weights1990, level=0) il2010[columns2000] = maup.prorate(pieces2000, il2000[columns], weights=weights2000) il2010[columns1990] = maup.prorate(pieces1990, il1990[columns], weights=weights1990) il2010.plot(column=il2010["TOTPOP_2000"].isna()) plt.show() print(il2010["NH_BLACK_2000"]) print(il2010["TOTPOP_2000"]) def relentropy(df, races, totpop_col): totpop = sum(x for x in df[totpop_col] if not isnan(x))
blocks[coi_col] = blocks_within[coi_col] # create district columns for each map plan_dict['court']['DistNum'] = plan_dict['court']['District_1'].map( lambda x: str(x).zfill(3)) plan_dict['enacted']['DistNum'] = plan_dict['enacted']['HOUSE_TA_6'].map( lambda x: str(x).zfill(3)) plan_dict['reform']['DistNum'] = plan_dict['reform']['DISTRICT_N'].map( lambda x: str(x).zfill(3)) plan_dict['court'].set_index('DistNum', inplace=True) plan_dict['enacted'].set_index('DistNum', inplace=True) plan_dict['reform'].set_index('DistNum', inplace=True) # assign blocks to map districts blocks['court'] = maup.assign(blocks, plan_dict['court']) blocks['enacted'] = maup.assign(blocks, plan_dict['enacted']) blocks['reform'] = maup.assign(blocks, plan_dict['reform']) # isolate the blocks in richmond COIs richmond_blocks = blocks.loc[ (blocks['coi_1'] == True) | (blocks['coi_2'] == True) | (blocks['coi_3'] == True) | (blocks['coi_4'] == True) | (blocks['coi_5'] == True) | (blocks['coi_6'] == True) | (blocks['coi_7'] == True) | (blocks['coi_8'] == True) | (blocks['coi_9'] == True) | (blocks['coi_10'] == True) | (blocks['coi_11'] == True) | (blocks['coi_12'] == True) | (blocks['coi_13'] == True)] # save COI blocks richmond_blocks.to_file('./COI/richmond_all_blocks.shp')
else: print('no') # check for unique district columns if plan_shp[dist_col].nunique() == num_dists: print('yes') else: print('no') # convert assignment column to string, set as index plan_shp[dist_col] = plan_shp[dist_col].map(lambda x:str(x).zfill(3)) plan_shp.set_index(dist_col, inplace=True) assignment = maup.assign(blocks, plan_shp) blocks[dist_type] = assignment # maup assign - precincts assignment = maup.assign(precs, plan_shp) precs[dist_type] = assignment # generate and export crosswalks prec_crosswalk = precs[[dist_type]] block_crosswalk = blocks[[dist_type]] prec_crosswalk.to_csv('./{0}_dash_prec_cross.csv'.format(state_name)) block_crosswalk.to_csv('.{0}_dash_block_cross.csv'.format(state_name))
state = gpd.read_file(state_path) state.crs list(state.columns) # SLDUST is state senate district state.rename(columns={"SLDUST": "id"}, inplace=True) state = state[["id", "geometry"]] # state HOR data st_house_path = "./raw-from-source/tl_2016_29_sldl/tl_2016_29_sldl.shp" st_house = gpd.read_file(st_house_path) st_house.crs list(st_house.columns) # SLDUST is st_house senate district st_house.rename(columns={"SLDLST": "id"}, inplace=True) st_house = st_house[["id", "geometry"]] # Assigning precincts to U.S. congressional districts assignment = maup.assign(prec, mscong_merging) assignment.isna().sum() prec["CD115FP"] = assignment # Assigning precincts to state senate districts assignment = maup.assign(prec, state) assignment.isna().sum() prec["SLDUST"] = assignment # Assigning precincts to state house districts assignment = maup.assign(prec, st_house) assignment.isna().sum() prec["SLDLST"] = assignment prec.to_file("./output/mo_prec_labeled/mo_prec_labeled_nopop.shp")
# Do we want to include CVAP data? cvap = False # Read in existing data and blocks. existing = gpd.read_file(indir) blocks = gpd.read_file(path.join(georoot, "blocks-demo-adjoined")).to_crs(existing.crs) # Get the columns we want. all_columns = list(set(list(blocks)) - {"GEOID", "geometry"}) nocvap_columns = list( set(c for c in list(blocks) if "_" not in c) - {"GEOID", "geometry"}) columns = all_columns if cvap else nocvap_columns # Aggregate up to precincts. assignment = maup.assign(blocks, existing) existing[columns] = blocks[columns].groupby(assignment).sum() # Fill NaNs with 0. existing[columns] = existing[columns].fillna(0) # Assert that our columns are nearly equal. for column in columns: try: assert np.isclose(existing[column].sum(), blocks[column].sum()) except AssertionError: print(f"The column {column} didn't sum properly.") # Fix geometries and write to file. existing["geometry"] = existing["geometry"].buffer(0) if not path.exists(outdir): os.mkdir(outdir)