Пример #1
0
 def verify_gerrychain(df):
     try:
         Graph.from_geodataframe(fix_buffer(df))
         print("GerryChain graph created")
         return True
     except Exception as error:
         print("Unable to create GerryChain graph: ", error)
         return False
Пример #2
0
    def test_uses_graph_geometries_by_default(self, geodataframe):
        mock_plot = MagicMock()
        gp.GeoDataFrame.plot = mock_plot

        graph = Graph.from_geodataframe(geodataframe)
        partition = Partition(graph=graph,
                              assignment={node: 0
                                          for node in graph})
        partition.plot()
        assert mock_plot.call_count == 1
Пример #3
0
def make_dual_graph(st, year):
    """
    Takes a 2-letter state postal code abbreviation (e.g. GA for Georgia), makes a dual graph of its shapefile, and writes the dual graph to a JSON
    
    Arguments:
    st -- 2 letter state postal code
    """

    # Fold state postal code to lowercase:
    st = st.lower()

    ##2016 Census Tract- TigerLine File for the appropriate state, follow link to download
    geo = gpd.read_file("./data/" + st + "_" + str(year) + "_tract.shp")
    graph = Graph.from_geodataframe(
        geo
    )  #if graph is successfully generated, we should be able to run chain
    graph.add_data(geo, columns=geo.columns)
    #nx.is_connected(graph) # if returns true, graph is connected
    graph.to_json("./data/" + st + "_tract.json")
    "SEN16R",
    "SEN16L",
]
for x in df.columns:
    if x in variables:
        df[x] = df[x].astype(int)
#county_col = "COUNTYFP10"
pop_col = "TOTPOP"

df["CPOP"] = df["TOTPOP"] - df["NCPOP"]
ccol = "CPOP"
uid = "ID"

num_districts = 14

graph = Graph.from_geodataframe(df, ignore_errors=True)
graph.add_data(df, list(df))
graph = nx.relabel_nodes(graph, df[uid])

elections = [
    Election("PRES16", {
        "Democratic": "PRES16D",
        "Republican": "PRES16R"
    }),
    Election("SEN16", {
        "Democratic": "SEN16D",
        "Republican": "SEN16R"
    })
]

#my_updaters = {"population" : updaters.Tally("TOTPOP", alias="population")}
Пример #5
0
def main(graph_json, shp, n_steps, output_dir, prefix, seed, pop_col, pop_tol,
         plan_col, reproject, election):
    os.makedirs(output_dir, exist_ok=True)
    has_geometry = False
    if not shp and not graph_json:
        print('Specify a shapefile or a NetworkX-format graph '
              'JSON file.',
              file=sys.stderr)
        sys.exit(1)
    elif shp and not graph_json:
        gdf = gpd.read_file(shp)
        if reproject:
            gdf = reprojected(gdf)
        graph = Graph.from_geodataframe(gdf)
        has_geometry = True
    elif graph_json and not shp:
        graph = Graph.from_json(graph_json)
    else:
        graph = Graph.from_json(graph_json)
        gdf = gpd.read_file(shp)
        if reproject:
            gdf = reprojected(gdf)
        print('Appending geometries from shapefile to graph...')
        graph.geometry = gdf.geometry  # TODO: is this always valid?
        has_geometry = True

    my_updaters = {'population': updaters.Tally(pop_col, alias='population')}
    if election:
        election_up = Election('election', {
            'Democratic': election[0],
            'Republican': election[1]
        })
        my_updaters['election'] = election_up
    initial_state = GeographicPartition(graph,
                                        assignment=plan_col,
                                        updaters=my_updaters)

    normal_chain = RecomChain(graph=graph,
                              total_steps=n_steps,
                              initial_state=initial_state,
                              pop_col=pop_col,
                              pop_tol=pop_tol,
                              reversible=False,
                              seed=seed)
    reversible_chain = RecomChain(graph=graph,
                                  total_steps=n_steps,
                                  initial_state=initial_state,
                                  pop_col=pop_col,
                                  pop_tol=pop_tol,
                                  reversible=True,
                                  seed=seed)

    normal_plans = [plan for plan in tqdm(normal_chain)]
    reversible_plans = [plan for plan in tqdm(reversible_chain)]
    cut_edges_fig(output_dir, prefix, normal_plans, reversible_plans)
    longest_boundary_fig(output_dir, prefix, normal_plans, reversible_plans)
    if has_geometry:
        demo_plans(output_dir, '_'.join([prefix, 'recom']), normal_plans,
                   n_steps, n_steps // 25)
        demo_plans(output_dir, '_'.join([prefix, 'reversible_recom']),
                   reversible_plans, n_steps, n_steps // 25)
    if election:
        election_hists(output_dir, 'dem_vote_share', 'election', 'Democratic',
                       normal_plans, reversible_plans)
    acceptance_stats(output_dir, '_'.join([prefix, 'recom']), normal_plans)
    acceptance_stats(output_dir, '_'.join([prefix, 'reversible_recom']),
                     reversible_plans)
import pandas as pd
from local_tools import states
from states import STATES
import geopandas as gpd
from glob import glob
from gerrychain import Graph

postal_to_name = {v: k.lower().replace(" ", "_") for k, v in states.name_postal_code_mappings.items()}


for code, st in list(STATES.items())[13:]:
    print("{} - {}".format(code, st["STFIPS"]))
    bg_shapes = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2010/BG/2010/tl_2010_{}_bg10.zip".format(st["STFIPS"]))
    bg_shapes = bg_shapes.rename(columns={"GEOID10": "GEOID"})
    bg_shapes = bg_shapes[["GEOID", "geometry"]].set_index("GEOID")
    graph = Graph.from_geodataframe(bg_shapes)
    graph.to_json("../districtContiguity/graphs/{}_blockgroups.json".format(postal_to_name[code]))
    # bg_shapes.to_csv("../districtCenter/resources/{}_blockgroups.csv".format(postal_to_name[code]), index=False)

for code, st in [("IA", STATES["IA"])]:
    print("{} - {}".format(code, st["STFIPS"]))
    cnty_shapes = gpd.read_file("https://www2.census.gov/geo/tiger/TIGER2010/COUNTY/2010/tl_2010_{}_county10.zip".format(st["STFIPS"]))
    cnty_shapes = cnty_shapes.rename(columns={"GEOID10": "GEOID"})
    cnty_shapes = cnty_shapes[["GEOID", "geometry"]].set_index("GEOID")
    graph = Graph.from_geodataframe(cnty_shapes)
    graph.to_json("../districtContiguity/graphs/{}_counties.json".format(postal_to_name[code]))
    # cnty_shapes.to_csv("../districtCenter/resources/{}_counties.csv".format(postal_to_name[code]), index=False)
def crossover_test():
    #test on IOWA
    #    k = 4
    #    graph_name = 'iowa'
    #    graph_path = './input_data/'+graph_name+'.json'
    #    graph = Graph.from_json(graph_path)
    #    num_districts = k
    #    ideal_pop = sum([graph.nodes[v]["TOTPOP"] for v in graph.nodes()])/num_districts
    #    unit_name = 'GEOID10'
    #    area_name = 'area'
    #    x_name = 'INTPTLON10'
    #    y_name = 'INTPTLAT10'
    #    # areaC_X = "areaC_X"
    #    # areaC_Y = "areaC_Y"
    #    # area = 'area'
    #    shapefile_name = 'IA_counties'
    #    gdf = gpd.read_file('./input_data/'+shapefile_name)
    #    gdf = gdf.to_crs({'init': 'epsg:26775'})

    #test on New Mexico
    k = 42  #NM state senate districts
    graph_name = 'New Mexico'
    unit_name = 'NAME10'
    num_districts = k
    plot_path = './input_data/NM_precincts_edited/NM_precincts_edited.shp'
    gdf = gpd.read_file(plot_path)
    graph = Graph.from_geodataframe(gdf)
    graph.add_data(gdf)
    ideal_pop = sum([graph.nodes[v]["TOTPOP"]
                     for v in graph.nodes()]) / num_districts
    area_name = 'Area'
    centroids = gdf.centroid
    c_x = centroids.x
    c_y = centroids.y
    for node in graph.nodes():
        graph.nodes[node]["x_val"] = c_x[node]
        graph.nodes[node]["y_val"] = c_y[node]
    x_name = 'x_val'
    y_name = 'y_val'

    ##test on TX
    #    k=36
    #    graph_name = 'Texas'
    #    graph_path = './input_data/tx.json'
    #    graph = Graph.from_json(graph_path)
    #    shapefile_path = './input_data/Texas_xy/Texas_xy.shp'
    #    gdf = gpd.read_file(shapefile_path)
    #    num_districts = k
    #    ideal_pop = sum([graph.nodes[v]["TOTPOP"] for v in graph.nodes()])/num_districts
    #    unit_name = 'CNTYVTD'
    #    area_name = 'Shape_area'
    #    x_name = 'x_val'
    #    y_name = 'y_val'
    #    gdf = gdf.to_crs({'init': 'epsg:26775'})

    for node in graph.nodes():
        graph.nodes[node]["x"] = float(graph.nodes[node][x_name])
        graph.nodes[node]["y"] = float(graph.nodes[node][y_name])
        graph.nodes[node]["area"] = float(graph.nodes[node][area_name])

    updaters = {
        "population": Tally("TOTPOP", alias="population"),
        "cut_edges": cut_edges,
        "centroids": centroids_x_y_area
    }

    new_plan1 = recursive_tree_part(graph, range(k), ideal_pop, "TOTPOP", .02,
                                    3)
    part1 = Partition(graph, assignment=new_plan1, updaters=updaters)
    new_plan2 = recursive_tree_part(graph, range(k), ideal_pop, "TOTPOP", .02,
                                    3)
    part2 = Partition(graph, assignment=new_plan2, updaters=updaters)

    max_adjust = 10000
    ep = 0.05

    print("tiling crossover test:")
    tiling_child1, tiling_child2 = tiling_crossover(part1,
                                                    part2,
                                                    k,
                                                    ep,
                                                    max_adjust,
                                                    ideal_pop,
                                                    draw_map=True,
                                                    gdf=gdf,
                                                    unit_name=unit_name,
                                                    testing=True)
    print(len(tiling_child1.cut_edges), len(tiling_child2.cut_edges))
    print("seam crossover test:")
    seam_child1, seam_child2 = seam_split_crossover(part1,
                                                    part2,
                                                    k,
                                                    ep,
                                                    max_adjust,
                                                    ideal_pop,
                                                    draw_map=True,
                                                    gdf=gdf,
                                                    unit_name=unit_name,
                                                    testing=True)
    print(len(seam_child1.cut_edges), len(seam_child2.cut_edges))
    print("book chapter crossover test:")
    book_child1, book_child2 = book_chapter_crossover(part1,
                                                      part2,
                                                      k,
                                                      ep,
                                                      max_adjust,
                                                      ideal_pop,
                                                      draw_map=True,
                                                      gdf=gdf,
                                                      unit_name=unit_name,
                                                      testing=True)
    print(len(book_child1.cut_edges), len(book_child2.cut_edges))
    print("chen crossover test:")
    chen_child1, chen_child2 = chen_crossover(part1,
                                              part2,
                                              k,
                                              ep,
                                              max_adjust,
                                              ideal_pop,
                                              draw_map=True,
                                              gdf=gdf,
                                              unit_name=unit_name,
                                              testing=True)
    print(len(chen_child1.cut_edges), len(chen_child2.cut_edges))
    print("half-half recom crossover test:")
    half_recom_child1, half_recom_child2 = half_half_recom_crossover(
        part1,
        part2,
        k,
        ep,
        max_adjust,
        ideal_pop,
        draw_map=True,
        gdf=gdf,
        unit_name=unit_name,
        testing=True)
    print(len(half_recom_child1.cut_edges), len(half_recom_child2.cut_edges))
Пример #8
0
    args = parser.parse_args()

    STEP_COUNT = args.steps
    BURN_IN = int(0.1 * STEP_COUNT)
    CITY_NAME = args.city
    STATE = args.state
    STATE_FIPS = str(args.fips)
    TOT_WORKERS = args.workers

    manager = Manager()
    results = manager.dict()

    race_matrix = load_data(CITY_NAME, STATE, STATE_FIPS)

    # build chain
    graph = Graph.from_geodataframe(race_matrix, adjacency="queen")
    nx.set_node_attributes(graph,
                           race_matrix["total"].to_dict(),
                           name="population")
    init_partition = Partition(
        graph,
        assignment=race_matrix.to_dict()["partition"],
        updaters={"population": Tally("population")},
    )

    # validators
    def mean_pop(part):
        return np.mean(list(part["population"].values()))

    def min_pop(part):
        return min(list(part["population"].values()))
#unit_name = 'GEOID10'
#area_name = 'area'
#x_name = 'INTPTLON10'
#y_name = 'INTPTLAT10'
#shapefile_name = 'IA_counties'
#gdf = gpd.read_file('./input_data/'+shapefile_name)
#gdf = gdf.to_crs({'init': 'epsg:26775'})

#NEW MEXICO
k = 42  #NM state senate districts
graph_name = 'New Mexico'
unit_name = 'NAME10'
num_districts = k
plot_path = './input_data/NM_precincts_edited/NM_precincts_edited.shp'
gdf = gpd.read_file(plot_path)
graph = Graph.from_geodataframe(gdf)
graph.add_data(gdf)
ideal_pop = sum([graph.nodes[v]["TOTPOP"]
                 for v in graph.nodes()]) / num_districts
area_name = 'Area'
centroids = gdf.centroid
c_x = centroids.x
c_y = centroids.y
for node in graph.nodes():
    graph.nodes[node]["x_val"] = c_x[node]
    graph.nodes[node]["y_val"] = c_y[node]
x_name = 'x_val'
y_name = 'y_val'

##TEXAS
#k=36
Пример #10
0
state_gdf = gpd.read_file(plot_path)
state_gdf["CD"] = state_gdf["CD"].astype('int')
state_gdf["Seed_Demo"] = state_gdf["Seed_Demo"].astype('int')
state_gdf.columns = state_gdf.columns.str.replace("-", "_")

#replace cut-off candidate names from shapefile with full names
state_gdf_cols = list(state_gdf.columns)
cand1_index = state_gdf_cols.index('RomneyR_12')
cand2_index = state_gdf_cols.index('ObamaD_12P')
state_gdf_cols[cand1_index:cand2_index + 1] = TX_columns
state_gdf.columns = state_gdf_cols
state_df = pd.DataFrame(state_gdf)
state_df = state_df.drop(['geometry'], axis=1)

#build graph from geo_dataframe #####################################################
graph = Graph.from_geodataframe(state_gdf)
graph.add_data(state_gdf)
centroids = state_gdf.centroid
c_x = centroids.x
c_y = centroids.y
for node in graph.nodes():
    graph.nodes[node]["C_X"] = c_x[node]
    graph.nodes[node]["C_Y"] = c_y[node]

#set up elections data structures ################################################
elections = list(elec_data["Election"])
elec_type = elec_data["Type"]
elec_cand_list = TX_columns

elecs_bool = ~elec_data.Election.isin(list(dropped_elecs))
elec_data_trunc = elec_data[elecs_bool].reset_index(drop=True)
Пример #11
0
def run_full_chain(chain_name):
    # # twilio setup, requires proper env variables to be set up (so it will text you when the chain is done)
    # account = os.environ["TWILIO_ACCT"]
    # auth = os.environ["TWILIO_AUTH"]
    # client = Client(account, auth)

    # get hyperparams
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-s",
        "--steps",
        type=int,
        help="number of steps for each markov chain",
        default=100000,
    )
    parser.add_argument("city", type=str, help="city name, i.e. Atlanta")
    parser.add_argument("state", type=str, help="state code, i.e. GA")
    parser.add_argument(
        "fips", help="state FIPS code (zero-padded on the end), i.e. 130")
    args = parser.parse_args()

    STEP_COUNT = args.steps
    BURN_IN_RATIO = 0.1
    CITY_NAME = args.city
    STATE = args.state
    STATE_FIPS = str(args.fips)
    THINNING_FACTOR = 5  # measure entropy only once every these many iterations of MC

    race_matrix = load_data(CITY_NAME, STATE, STATE_FIPS, fake=False)
    R_scratch = race_matrix[[
        "partition", "geometry"
    ]]  # scratch version of R for the polsby-popper computation

    print(race_matrix.head())

    # build chain
    graph = Graph.from_geodataframe(race_matrix, adjacency="queen")
    nx.set_node_attributes(graph,
                           race_matrix["total"].to_dict(),
                           name="population")
    init_partition = Partition(
        graph,
        assignment=race_matrix.to_dict()["partition"],
        updaters={"population": Tally("population")},
    )

    # validators
    def mean_pop(part):
        return np.mean(list(part["population"].values()))

    def min_pop(part):
        return np.min(list(part["population"].values()))

    def sd_pop(part):
        return np.std(list(part["population"].values()))

    # TODO: only check if GISJOIN in minimum P-P partition have changed
    # TODO: cache set of GISJOINs for minimum partition for lowest P-P partition
    # TODO: compare this set to the new one when given a partition
    # TODO: if set is different, recompute P-P for whole partition, else do nothing
    def partition_polsby_popper(part, R=R_scratch):
        """Checks if partition is within polsby-popper metric

        Args:
            partition (gerrychain partition): partition map from a single step in the Markov Chain
            R (geopandas.GeoDataFrame): columns 'partition' and 'geometry' for getting the polygons

        Returns:
            function that takes partition and checks if it's within the bounds
        """
        # get all shapes from each district
        # compute polsby-popper on all districts, get min
        pd.options.mode.chained_assignment = None
        R.loc[:, "partition"] = race_matrix.index.map(dict(part.assignment))
        R_temp = R.copy(deep=True).dissolve(by="partition")
        polsby_popper = lambda d: (4 * np.pi * d.area) / (d.length**2
                                                          )  # d is a polygon
        # srs = R["geometry"].map(polsby_popper).values
        # print(np.min(srs), np.mean(srs), np.max(srs))
        # return srs.min()
        return R_temp["geometry"].map(polsby_popper).min()
        # return min(polsby_popper_from_R(R).values())

    def polsby_popper_from_R(R):
        """A more stable version of geopandas dissolve."""
        from shapely.ops import unary_union

        # loop through all partitons and unary join them, the return a dict indexed by partition id
        result = {}
        polsby_popper = lambda d: (4 * np.pi * d.area) / (d.length**2
                                                          )  # d is a polygon
        for pid in R["partition"].unique():
            # get all geometries
            geom = R.loc[R["partition"] == pid]["geometry"].values
            result[pid] = polsby_popper(unary_union(geom))
        return result

    def partition_polsby_popper_min(
        part,
        R=R_scratch,
    ):
        nonlocal min_partition_id
        nonlocal min_partition_gisjoins
        nonlocal min_partition_p_p
        pd.options.mode.chained_assignment = None
        R.loc[:, "partition"] = race_matrix.index.map(dict(part.assignment))
        same_gisjoins = (set(
            R.loc[R["partition"] == min_partition_id].index.values) ==
                         min_partition_gisjoins)
        if min_partition_id is not None and same_gisjoins:
            # no change, return the old one
            return min_partition_p_p
        else:
            # something changed, so recompute all partitions
            # R_temp = R.copy(deep=True).dissolve(by="partition")
            # p_p_scores = R_temp["geometry"].map(polsby_popper)
            # min_partition_p_p = p_p_scores.min()
            # min_partition_id = R_temp.iloc[np.argmin(p_p_scores.values)].name
            p_p_scores = polsby_popper_from_R(R)
            min_partition_p_p = min(p_p_scores.values())
            min_partition_id = min(p_p_scores.items(), key=lambda x: x[1])[0]
            min_partition_gisjoins = set(
                R.loc[R["partition"] == min_partition_id].index.values)
            if (min_partition_p_p <
                    0.147):  # initial oakland partition has min score of 0.147
                print("Rejected with score", min_partition_p_p)
            return min_partition_p_p

    mean_one_sd_up = mean_pop(init_partition) + (2 /
                                                 3) * sd_pop(init_partition)
    mean_one_sd_down = mean_pop(init_partition) - (2 /
                                                   3) * sd_pop(init_partition)

    min_partition_id, min_partition_gisjoins, min_partition_p_p = None, set(
    ), None

    # initalize and run chains
    # TODO: record descent
    is_valid = Validator([
        LowerBound(min_pop,
                   min_pop(init_partition) % 50),
        UpperBound(mean_pop, mean_one_sd_up),
        LowerBound(mean_pop, mean_one_sd_down),
        WithinPercentRangeOfBounds(sd_pop, 25),
        # contiguous,
        # LowerBound(
        #     partition_polsby_popper, bound=partition_polsby_popper(init_partition)
        # ),
        # LowerBound(
        #     partition_polsby_popper_min,
        #     bound=partition_polsby_popper_min(init_partition),
        # ),
        no_vanishing_districts,
    ])

    # make sure init_partition passes validators
    assert is_valid(init_partition)

    chain = MarkovChain(
        proposal=propose_chunk_flip,
        constraints=is_valid,
        accept=always_accept,
        initial_state=init_partition,
        total_steps=(STEP_COUNT * THINNING_FACTOR) +
        int(STEP_COUNT * BURN_IN_RATIO),
    )
    print(f"Prereqs created, {chain_name} running...")
    # burn-in of 1000
    iter(chain)
    # print(f"Burn-in: ({int(STEP_COUNT * BURN_IN_RATIO)} steps)")
    for i in range(int(STEP_COUNT * BURN_IN_RATIO)):
        if i % 100 == 0:
            print(
                f"{chain_name} BURN IN => {i}/{int(STEP_COUNT * BURN_IN_RATIO)}"
            )
        next(chain)
    # print(f"Measurement: ({STEP_COUNT} steps)")
    entropies = []
    scores = []
    start_time = time.time()

    for i in range(STEP_COUNT * THINNING_FACTOR):
        if i % 25 == 0:
            print(
                f"{chain_name} ELAPSED {round(time.time() - start_time, 1)}s => {len(entropies)}/{STEP_COUNT}"
            )
        if i % THINNING_FACTOR == 0:
            part = next(chain)
            entropies.append(chain_to_entropy(part, race_matrix))
            scores.append(partition_polsby_popper_min(part))
        else:
            next(chain)

    np.save("./results_2020/polsby_popper_oakland.npy", np.array(scores))

    save_results(
        CITY_NAME,
        STEP_COUNT,
        chain_name,
        baseline=chain_to_entropy(init_partition, race_matrix),
        entropies=entropies,
    )