def test_prorate_gives_expected_value(sources, targets): pieces = intersections(sources, targets, area_cutoff=0) weights = pieces.area / pieces.index.get_level_values("source").to_series( index=pieces.index ).map(sources.area) prorated = prorate(pieces, sources.area, weights) assert (prorated == targets.area).all()
def test_trivial_case(sources): sources["data1"] = [10, 10, 10, 10] sources["data2"] = [10, 10, 10, 10] columns = ["data1", "data2"] pieces = intersections(sources, sources, area_cutoff=0) weights = pandas.Series([1] * len(pieces), index=pieces.index) prorated = prorate(pieces, sources[columns], weights) assert (prorated == sources[columns]).all().all()
def test_prorate_raises_if_data_is_not_dataframe_or_series(sources, targets): pieces = intersections(sources, targets) with pytest.raises(TypeError): prorate( pieces, "not a series", weights=pandas.Series([0] * len(pieces), index=pieces.index), )
def test_crop_to(): blocks = geopandas.read_file("zip://./examples/blocks.zip") old_precincts = geopandas.read_file("zip://./examples/precincts.zip") new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip") columns = ["SEN18D", "SEN18R"] # Calculate without cropping pieces = maup.intersections(old_precincts, new_precincts, area_cutoff=0) weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum() weights = maup.normalize(weights, level=0) new_precincts[columns] = maup.prorate(pieces, old_precincts[columns], weights=weights) # Calculate with cropping old_precincts["geometries"] = maup.crop_to(old_precincts, new_precincts) new_precincts_cropped = new_precincts.copy() pieces = maup.intersections(old_precincts, new_precincts_cropped, area_cutoff=0) weights = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces)).sum() weights = maup.normalize(weights, level=0) new_precincts_cropped[columns] = maup.prorate(pieces, old_precincts[columns], weights=weights) assert new_precincts_cropped.area.sum() != new_precincts.area.sum() diff_sum = 0 for col in columns: diff = new_precincts_cropped[col].sum() - new_precincts[col].sum() assert diff >= 0 diff_sum += diff # Ideally this would be strictly positive (which would mean less votes are lost after cropping) # but crop_to doesn't resolve the missing votes errors yet. assert diff_sum >= 0
def test_prorate_dataframe(sources, targets): sources["data1"] = [10, 10, 10, 10] sources["data2"] = [10, 10, 10, 10] columns = ["data1", "data2"] pieces = intersections(sources, targets) weight_by = pieces.area / pieces.index.get_level_values("source").map(sources.area) # Use blocks to estimate population of each piece prorated = prorate(pieces, sources[columns], weight_by) assert (prorated["data1"] == 10 * targets.area).all() assert (prorated["data2"] == 10 * targets.area).all()
def test_example_case(): blocks = geopandas.read_file("zip://./examples/blocks.zip") old_precincts = geopandas.read_file("zip://./examples/precincts.zip") new_precincts = geopandas.read_file("zip://./examples/new_precincts.zip") columns = ["SEN18D", "SEN18R"] # Include area_cutoff=0 to ignore any intersections with no area, # like boundary intersections, which we do not want to include in # our proration. pieces = intersections(old_precincts, new_precincts, area_cutoff=0) # Weight by prorated population from blocks weights = blocks["TOTPOP"].groupby(assign(blocks, pieces)).sum() # Use blocks to estimate population of each piece new_precincts[columns] = prorate(pieces, old_precincts[columns], weights=weights) assert (new_precincts[columns] > 0).sum().sum() > len(new_precincts) / 2
def test_one_dimensional_intersections_dont_cause_error(sources): pieces = intersections(sources, sources.iloc[:2]) weight_by = pieces.area / pieces.index.get_level_values("source").map(sources.area) prorated = prorate(pieces, sources.area, weight_by) assert (prorated == sources.iloc[:2].area).all()
columns1990 = list(map(lambda x : x + "_1990", columns)) il2010 = gpd.read_file(chicago_2010_file) il2000 = gpd.read_file(chicago_2000_file) il1990 = gpd.read_file(chicago_1990_file) blocks = gpd.read_file(chicago_blocks_file) il2000.crs = il2010.crs il1990.crs = il2010.crs blocks.to_crs(il2010.crs, inplace=True) for c in columns: il2010[c] = il2010[c].astype(int) il2000[c] = il2000[c].astype(int) il1990[c] = il1990[c].astype(int) pieces2000 = maup.intersections(il2000, il2010, area_cutoff=0) pieces1990 = maup.intersections(il1990, il2010, area_cutoff=0) weights2000 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces2000)).sum() weights1990 = blocks["TOTPOP"].groupby(maup.assign(blocks, pieces1990)).sum() weights2000 = maup.normalize(weights2000, level=0) weights1990 = maup.normalize(weights1990, level=0) il2010[columns2000] = maup.prorate(pieces2000, il2000[columns], weights=weights2000) il2010[columns1990] = maup.prorate(pieces1990, il1990[columns], weights=weights1990) il2010.plot(column=il2010["TOTPOP_2000"].isna()) plt.show() print(il2010["NH_BLACK_2000"]) print(il2010["TOTPOP_2000"])
# In[6]: resolved = maup.resolve_overlaps(example) # In[17]: resolved.plot(figsize=(12, 12), edgecolor="#004464", color="#eeeeee") plt.axis('off') # In[44]: maup.adjacencies(example).plot(figsize=(14, 14)) # In[47]: inters = maup.intersections(example, example, area_cutoff=None) # In[50]: non_self_inters = [i != j for i, j in inters.index] inters = inters[non_self_inters] # In[55]: inters[inters.area > 0].plot(figsize=(12, 12), linewidth=5, edgecolor="black") # In[64]: inters.loc[[inters.area.idxmax()]].plot(figsize=(14, 14)) plt.axis('off')