def test_exterior_boundaries(): graph = three_by_three_grid() for i in [0, 1, 2, 3, 5, 6, 7, 8]: graph.nodes[i]['boundary_node'] = True graph.nodes[i]['boundary_perim'] = 2 graph.nodes[4]['boundary_node'] = False assignment = {0: 1, 1: 1, 2: 2, 3: 1, 4: 1, 5: 2, 6: 2, 7: 2, 8: 2} updaters = { 'exterior_boundaries': exterior_boundaries, 'boundary_nodes': boundary_nodes } partition = Partition(graph, assignment, updaters) result = partition['exterior_boundaries'] assert result[1] == 6 and result[2] == 10 # 112 111 # 112 -> 121 # 222 222 flips = {4: 2, 2: 1, 5: 1} new_partition = Partition(parent=partition, flips=flips) result = new_partition['exterior_boundaries'] assert result[1] == 10 and result[2] == 6
def test_Partition_can_update_stats(): graph = networkx.complete_graph(3) assignment = {0: 1, 1: 1, 2: 2} graph.nodes[0]['stat'] = 1 graph.nodes[1]['stat'] = 2 graph.nodes[2]['stat'] = 3 partition = Partition(graph, assignment, aggregate_fields=['stat']) assert partition.statistics['stat'][2] == 3 flip = {1: 2} new_partition = partition.merge(flip) assert new_partition.statistics['stat'][2] == 5
def test_implementation_of_cut_edges_matches_naive_method(): graph = three_by_three_grid() assignment = {0: 1, 1: 1, 2: 2, 3: 1, 4: 1, 5: 2, 6: 2, 7: 2, 8: 2} updaters = {'cut_edges': cut_edges} partition = Partition(graph, assignment, updaters) flip = {4: 2} new_partition = Partition(parent=partition, flips=flip) result = cut_edges(new_partition) naive_cut_edges = {edge for edge in graph.edges if new_partition.crosses_parts(edge)} assert edge_set_equal(result, naive_cut_edges)
def test_single_flip_contiguity_equals_contiguity(): import random random.seed(1887) def equality_validator(partition): val = partition["contiguous"] == partition["flip_check"] assert val return partition["contiguous"] df = gp.read_file("rundmcmc/testData/mo_cleaned_vtds.shp") with open("rundmcmc/testData/MO_graph.json") as f: graph_json = json.load(f) graph = networkx.readwrite.json_graph.adjacency_graph(graph_json) assignment = get_assignment_dict_from_df(df, "GEOID10", "CD") validator = Validator([equality_validator]) updaters = { "contiguous": contiguous, "cut_edges": cut_edges, "flip_check": single_flip_contiguous } initial_partition = Partition(graph, assignment, updaters) accept = lambda x: True chain = MarkovChain(propose_random_flip, validator, accept, initial_partition, total_steps=100) list(chain)
def example_partition(): df = gp.read_file(os.path.join(TEST_DATA_PATH, "mo_cleaned_vtds.shp")) with open(os.path.join(TEST_DATA_PATH, "MO_graph.json")) as f: graph_json = json.load(f) graph = networkx.readwrite.json_graph.adjacency_graph(graph_json) assignment = get_assignment_dict(df, "GEOID10", "CD") add_data_to_graph( df, graph, ['PR_DV08', 'PR_RV08', 'POP100', 'ALAND10', 'COUNTYFP10'], id_col='GEOID10') updaters = { **votes_updaters(['PR_DV08', 'PR_RV08'], election_name='08'), 'population': Tally('POP100', alias='population'), 'counties': county_splits('counties', 'COUNTYFP10'), 'cut_edges': cut_edges, 'cut_edges_by_part': cut_edges_by_part } return Partition(graph, assignment, updaters)
def PA_partition(): # this is a networkx adjancency data json file with CD, area, population, and vote data graph = construct_graph("./testData/PA_graph_with_data.json") # Add frozen attributes to graph # data = gp.read_file("./testData/frozen.shp") # add_data_to_graph(data, graph, ['Frozen'], 'wes_id') assignment = dict( zip(graph.nodes(), [graph.node[x]['CD'] for x in graph.nodes()])) updaters = { **votes_updaters(['VoteA', 'VoteB']), 'population': Tally('POP100', alias='population'), 'perimeters': perimeters, 'exterior_boundaries': exterior_boundaries, 'boundary_nodes': boundary_nodes, 'cut_edges': cut_edges, 'areas': Tally('ALAND10', alias='areas'), 'polsby_popper': polsby_popper, 'cut_edges_by_part': cut_edges_by_part } return Partition(graph, assignment, updaters)
def test_cut_edges_doesnt_duplicate_edges_with_different_order_of_nodes(): graph = three_by_three_grid() assignment = {0: 1, 1: 1, 2: 2, 3: 1, 4: 1, 5: 2, 6: 2, 7: 2, 8: 2} updaters = {'cut_edges': cut_edges} partition = Partition(graph, assignment, updaters) # 112 111 # 112 -> 121 # 222 222 flip = {4: 2, 2: 1, 5: 1} new_partition = Partition(parent=partition, flips=flip) result = new_partition['cut_edges'] for edge in result: assert (edge[1], edge[0]) not in result
def test_perimeters(): graph = three_by_three_grid() for i in [0, 1, 2, 3, 5, 6, 7, 8]: graph.nodes[i]['boundary_node'] = True graph.nodes[i]['boundary_perim'] = 1 graph.nodes[4]['boundary_node'] = False for edge in graph.edges: graph.edges[edge]['shared_perim'] = 1 assignment = {0: 1, 1: 1, 2: 2, 3: 1, 4: 1, 5: 2, 6: 2, 7: 2, 8: 2} updaters = { 'exterior_boundaries': exterior_boundaries, 'interior_boundaries': interior_boundaries, 'cut_edges_by_part': cut_edges_by_part, 'boundary_nodes': boundary_nodes, 'perimeters': perimeters } partition = Partition(graph, assignment, updaters) # 112 # 112 # 222 result = partition['perimeters'] assert result[1] == 3 + 4 # 3 nodes + 4 edges assert result[2] == 5 + 4 # 5 nodes + 4 edges
def test_Partition_can_update_stats(): graph = networkx.complete_graph(3) assignment = {0: 1, 1: 1, 2: 2} graph.nodes[0]['stat'] = 1 graph.nodes[1]['stat'] = 2 graph.nodes[2]['stat'] = 3 updaters = {'total_stat': Tally('stat', alias='total_stat')} partition = Partition(graph, assignment, updaters) assert partition['total_stat'][2] == 3 flip = {1: 2} new_partition = partition.merge(flip) assert new_partition['total_stat'][2] == 5
def test_cut_edges_can_handle_multiple_flips(): graph = three_by_three_grid() assignment = {0: 1, 1: 1, 2: 2, 3: 1, 4: 1, 5: 2, 6: 2, 7: 2, 8: 2} updaters = {'cut_edges': cut_edges} partition = Partition(graph, assignment, updaters) # 112 111 # 112 -> 121 # 222 222 flip = {4: 2, 2: 1, 5: 1} new_partition = Partition(parent=partition, flips=flip) result = new_partition['cut_edges'] naive_cut_edges = {tuple(sorted(edge)) for edge in graph.edges if new_partition.crosses_parts(edge)} assert result == naive_cut_edges
def test_cut_edges_by_part_gives_same_total_edges_as_naive_method(): graph = three_by_three_grid() assignment = {0: 1, 1: 1, 2: 2, 3: 1, 4: 1, 5: 2, 6: 2, 7: 2, 8: 2} updaters = {'cut_edges_by_part': cut_edges_by_part} partition = Partition(graph, assignment, updaters) # 112 111 # 112 -> 121 # 222 222 flip = {4: 2, 2: 1, 5: 1} new_partition = Partition(parent=partition, flips=flip) result = new_partition['cut_edges_by_part'] naive_cut_edges = {tuple(sorted(edge)) for edge in graph.edges if new_partition.crosses_parts(edge)} assert naive_cut_edges == {tuple(sorted(edge)) for part in result for edge in result[part]}
def setup_for_proportion_updaters(columns): graph = three_by_three_grid() attach_random_data(graph, columns) assignment = random_assignment(graph, 3) updaters = votes_updaters(columns) return Partition(graph, assignment, updaters)
def test_tally_multiple_columns(): graph = three_by_three_grid() attach_random_data(graph, ['D', 'R']) updaters = {'total': Tally(['D', 'R'], alias='total')} assignment = {i: 1 if i in range(4) else 2 for i in range(9)} partition = Partition(graph, assignment, updaters) expected_total_in_district_one = sum( graph.nodes[i]['D'] + graph.nodes[i]['R'] for i in range(4)) assert partition['total'][1] == expected_total_in_district_one
def main(): # Sketch: # 1. Load dataframe. # 2. Construct neighbor information. # 3. Make a graph from this. # 4. Throw attributes into graph. df = gp.read_file("./testData/mo_cleaned_vtds.shp") graph = networkx.readwrite.read_gpickle('example_graph.gpickle') add_data_to_graph(df, graph, ["PR_DV08", "PR_RV08", "P_08"], "GEOID10") assignment = get_assignment_dict(df, "GEOID10", "CD") updaters = { 'd_votes': statistic_factory('PR_DV08', alias='d_votes'), 'r_votes': statistic_factory('PR_RV08', alias='r_votes'), 'cut_edges': cut_edges } initial_partition = Partition(graph, assignment, updaters) validator = Validator([contiguous]) accept = lambda x: True chain = MarkovChain(propose_random_flip, validator, accept, initial_partition, total_steps=100) mm = [] mt = [] #eg=[] for state in chain: mm.append( mean_median2(state, data_column1='d_votes', data_column2='r_votes')) mt.append( mean_thirdian2(state, data_column1='d_votes', data_column2='r_votes')) #eg.append(efficiency_gap(state, data_column1='d_votes',data_column2='r_votes)) #print(graph.nodes(data=True)) mm_outs = [mm] #,eg] mt_outs = [mt] #eg_outs=[eg] with open('mm_chain_out', "w") as output: writer = csv.writer(output, lineterminator='\n') writer.writerows(mm_outs) with open('mt_chain_out', "w") as output: writer = csv.writer(output, lineterminator='\n') writer.writerows(mt_outs)
def test_vote_proportion_returns_nan_if_total_votes_is_zero(): columns = ['D', 'R'] graph = three_by_three_grid() for node in graph.nodes: for col in columns: graph.nodes[node][col] = 0 updaters = votes_updaters(columns) assignment = random_assignment(graph, 3) partition = Partition(graph, assignment, updaters) assert all(math.isnan(value) for value in partition['D%'].values()) assert all(math.isnan(value) for value in partition['R%'].values())
def test_vote_proportion_updater_returns_percentage_or_nan_on_later_steps(): columns = ['D', 'R'] graph = three_by_three_grid() attach_random_data(graph, columns) assignment = random_assignment(graph, 3) updaters = {**votes_updaters(columns), 'cut_edges': cut_edges} initial_partition = Partition(graph, assignment, updaters) chain = MarkovChain(propose_random_flip, Validator([no_vanishing_districts]), lambda x: True, initial_partition, total_steps=10) for partition in chain: assert all(is_percentage_or_nan(value) for value in partition['D%'].values()) assert all(is_percentage_or_nan(value) for value in partition['R%'].values())
def set_up_plan(plan): graph = Graph.load('./PA_queen.json').graph assignment = {node: graph.nodes[node][plan] for node in graph.nodes} updaters = { 'perimeters': perimeters, 'exterior_boundaries': exterior_boundaries, 'interior_boundaries': interior_boundaries, 'boundary_nodes': boundary_nodes, 'cut_edges': cut_edges, 'areas': Tally('area', alias='areas'), 'polsby_popper': polsby_popper, 'cut_edges_by_part': cut_edges_by_part } return Partition(graph, assignment, updaters)
def set_up_chain(plan, total_steps, adjacency_type='queen'): graph = Graph.load(f"./PA_{adjacency_type}.json").graph assignment = {node: graph.nodes[node][plan] for node in graph.nodes} updaters = { **votes_updaters(elections["2016_Presidential"], election_name="2016_Presidential"), **votes_updaters(elections["2016_Senate"], election_name="2016_Senate"), 'population': Tally('population', alias='population'), 'perimeters': perimeters, 'exterior_boundaries': exterior_boundaries, 'interior_boundaries': interior_boundaries, 'boundary_nodes': boundary_nodes, 'cut_edges': cut_edges, 'areas': Tally('area', alias='areas'), 'polsby_popper': polsby_popper, 'cut_edges_by_part': cut_edges_by_part } partition = Partition(graph, assignment, updaters) population_constraint = within_percent_of_ideal_population(partition, 0.01) compactness_constraint = SelfConfiguringLowerBound(L_minus_1_polsby_popper, epsilon=0.1) is_valid = Validator(default_constraints + [population_constraint, compactness_constraint]) return partition, MarkovChain(propose_random_flip, is_valid, always_accept, partition, total_steps)
def main(): graph = construct_graph(*ingest("./testData/wyoming_test.shp", "GEOID")) cd_data = get_list_of_data('./testData/wyoming_test.shp', ['CD', 'ALAND']) add_data_to_graph(cd_data, graph, ['CD', 'ALAND']) assignment = pull_districts(graph, 'CD') validator = Validator([contiguous]) initial_partition = Partition(graph, assignment, aggregate_fields=['ALAND']) accept = lambda x: True chain = MarkovChain(propose_random_flip, validator, accept, initial_partition, total_steps=10) for step in chain: print(step.assignment)
def example_partition(): df = gp.read_file("./testData/mo_cleaned_vtds.shp") with open("./testData/MO_graph.json") as f: graph_json = json.load(f) graph = networkx.readwrite.json_graph.adjacency_graph(graph_json) assignment = get_assignment_dict(df, "GEOID10", "CD") add_data_to_graph( df, graph, ['PR_DV08', 'PR_RV08', 'POP100', 'ALAND10', 'COUNTYFP10'], id_col='GEOID10') updaters = { **votes_updaters(['PR_DV08', 'PR_RV08'], election_name='08'), 'population': Tally('POP100', alias='population'), 'areas': Tally('ALAND10', alias='areas'), 'counties': county_splits('counties', 'COUNTYFP10'), 'perimeters': perimeters, 'exterior_boundaries': exterior_boundaries, 'boundary_nodes': boundary_nodes, 'polsby_popper': polsby_popper, 'cut_edges': cut_edges, 'cut_edges_by_part': cut_edges_by_part } return Partition(graph, assignment, updaters)
def example_partition(): graph = networkx.complete_graph(3) assignment = {0: 1, 1: 1, 2: 2} partition = Partition(graph, assignment, updaters={'cut_edges': cut_edges}) return partition
def example_partition(): graph = networkx.complete_graph(3) assignment = {0: 1, 1: 1, 2: 2} partition = Partition(graph, assignment) return partition
def read_basic_config(configFileName): """Reads basic configuration file and sets up a chain run :configFileName: relative path to config file :returns: Partition instance and MarkovChain instance """ # set up the config file parser config = configparser.ConfigParser() config.read(configFileName) # SET UP GRAPH AND PARTITION SECTION # create graph and get global names for required graph attributes graph, POP, AREA, CD = gsource_gdata(config, 'GRAPH_SOURCE', 'GRAPH_DATA') voteDataList = vsource_vdata(graph, config, 'VOTE_DATA_SOURCE', 'VOTE_DATA') # create a list of vote columns to update DataUpdaters = {v: updates.Tally(v) for v in voteDataList} # construct initial districting plan assignment = {x[0]: x[1][CD] for x in graph.nodes(data=True)} # set up validator functions and create Validator class instance validatorsUpdaters = [] validators = [] if config.has_section('VALIDITY') and len(list( config['VALIDITY'].keys())) > 0: validators = list(config['VALIDITY'].values()) for i, x in enumerate(validators): if len(x.split(',')) == 1: validators[i] = getattr(valids, x) else: [y, z] = x.split(',') validators[i] = valids.WithinPercentRangeOfBounds( getattr(valids, y), z) validatorsUpdaters.extend( [x.split(',')[0] for x in config['VALIDITY'].values()]) validators = valids.Validator(validators) # add updaters required by this list of validators to list of updaters for x in validatorsUpdaters: DataUpdaters.update(dependencies(x, POP, AREA)) # END SET UP GRAPH AND PARTITION SECTION # SET UP MARKOVCHAIN RUN SECTION # set up parameters for markovchain run chainparams = config['MARKOV_CHAIN'] # number of steps to run num_steps = 1000 if 'num_steps' in list(chainparams.keys()): num_steps = int(chainparams['num_steps']) # type of flip to use proposal = proposals.propose_random_flip if 'proposal' in list(chainparams.keys()): proposal = getattr(proposals, chainparams['proposal']) # acceptance function to use accept = accepts.always_accept if 'accept' in list(chainparams.keys()): accept = getattr(accepts, chainparams['accept']) # END SET UP MARKOVCHAIN RUN SECTION # SET UP DATA PROCESSOR FOR CHAIN RUN # get evaluation scores to compute and the columns to use for each escores, cfunc, elist, sVisType, outFName = escores_edata( config, "EVALUATION_SCORES", "EVALUATION_SCORES_DATA") # add evaluation scores updaters to list of updators for x in elist: DataUpdaters.update(dependencies(x, POP, AREA)) # END SET UP DATA PROCESSOR FOR CHAIN RUN updaters = DataUpdaters # create markovchain instance initial_partition = Partition(graph, assignment, updaters) chain = MarkovChain(proposal, validators, accept, initial_partition, num_steps) return chain, cfunc, escores, sVisType, outFName
def main(): # Get the data, set the number of steps, and denote the column header # containing vote data. datapath = "./Prorated/Prorated.shp" graphpath = "./graphs/utah.json" steps = int(sys.argv[-1]) r_header = "R" d_header = "D" # Generate a dataframe, graph, and then combine the two. df = gpd.read_file(datapath) graph = construct_graph(graphpath) add_data_to_graph(df, graph, [r_header, d_header], id_col="GEOID10") # Get the discrict assignment and add updaters. assignment = dict( zip(graph.nodes(), [graph.node[x]["CD"] for x in graph.nodes()])) updaters = { **votes_updaters([r_header, d_header]), "population": Tally("POP10", alias="population"), "perimeters": perimeters, "exterior_boundaries": exterior_boundaries, "interior_boundaries": interior_boundaries, "boundary_nodes": boundary_nodes, "cut_edges": cut_edges, "areas": Tally("ALAND10", alias="areas"), "polsby_popper": polsby_popper, "cut_edges_by_part": cut_edges_by_part } # Create an initial partition and a Pennsylvania-esque chain run. initial_partition = Partition(graph, assignment, updaters) validator = Validator( [refuse_new_splits, no_vanishing_districts, single_flip_contiguous]) chain = MarkovChain(propose_random_flip, validator, always_accept, initial_partition, total_steps=steps) # Pick the scores we want to track. scores = { "Mean-Median": functools.partial(mean_median, proportion_column_name=r_header + "%"), "Mean-Thirdian": functools.partial(mean_thirdian, proportion_column_name=d_header + "%"), "Efficiency Gap": functools.partial(efficiency_gap, col1=r_header, col2=d_header), "L1 Reciprocal Polsby-Popper": L1_reciprocal_polsby_popper } # Set initial scores, then allow piping and plotting things. initial_scores = { key: score(initial_partition) for key, score in scores.items() } table = pipe_to_table(chain, scores) fig, axes = plt.subplots(2, 2) # Configuring where the plots go. quadrants = { "Mean-Median": (0, 0), "Mean-Thirdian": (0, 1), "Efficiency Gap": (1, 0), "L1 Reciprocal Polsby-Popper": (1, 1) } # Plotting things! for key in scores: quadrant = quadrants[key] axes[quadrant].hist(table[key], bins=50) axes[quadrant].set_title(key) axes[quadrant].axvline(x=initial_scores[key], color="r") # Show the histogram. plt.savefig(f"./output/histograms/{steps}.png")
'cut_edges': cut_edges, 'areas': Tally('areas'), 'polsby_popper': polsby_popper, 'cut_edges_by_part': cut_edges_by_part, #'County_Splits': county_splits('County_Splits',county_col) } # Add the vote updaters for multiple plans for i in range(num_elections): updaters = { **updaters, **votes_updaters(election_columns[i], election_names[i]) } # This builds the partition object initial_partition = Partition(graph, assignment, updaters) # Choose which binary constraints to enforce # Options are in validity.py pop_limit = .2 population_constraint = within_percent_of_ideal_population( initial_partition, pop_limit) compactness_constraint_Lm1 = LowerBound( L_minus_1_polsby_popper, L_minus_1_polsby_popper(initial_partition)) #edge_constraint = UpperBound(number_cut_edges, 850) edge_constraint = UpperBound(number_cut_edges, 2 * number_cut_edges(initial_partition))
def main(): #graph = construct_graph_from_file("/Users/caranix/Desktop/Alaska_Chain/AK_data.shp", geoid_col="DISTRICT") with open('./alaska_graph.json') as f: data = json.load(f) graph = networkx.readwrite.json_graph.adjacency_graph(data) df = gp.read_file( "/Users/caranix/Desktop/Alaska_Chain/AK_data.shp" ) # assignment = dict(zip(graph.nodes(), [graph.node[x]['HOUSEDIST'] for x in graph.nodes()])) add_data_to_graph(df, graph, [ 'join_Distr', 'POPULATION', 'join_Dem', 'join_Rep', 'perc_Dem', 'perc_Rep', 'AREA' ], id_col='DISTRICT') data = json.dumps(networkx.readwrite.json_graph.adjacency_data(graph)) with open('./alaska_graph.json', 'w') as f: f.write(data) assignment = dict( zip(graph.nodes(), [graph.node[x]['join_Distr'] for x in graph.nodes()])) updaters = { 'population': Tally('POPULATION', alias='population'), 'cut_edges': cut_edges, 'cut_edges_by_part': cut_edges_by_part, **votes_updaters(['join_Dem', 'join_Rep'], election_name='12'), 'perimeters': perimeters, 'exterior_boundaries': exterior_boundaries, 'boundary_nodes': boundary_nodes, 'cut_edges': cut_edges, 'areas': Tally('AREA', alias='areas'), 'polsby_popper': polsby_popper } p = Partition(graph, assignment, updaters) print("Starting Chain") chain = BasicChain(p, 1000000) allAssignments = {0: chain.state.assignment} for step in chain: allAssignments[chain.counter + 1] = step.flips # print(mean_median(step, 'join_Dem%')) # with open("chain_outputnew.json", "w") as f: # f.write(json.dumps(allAssignments)) #efficiency_gap(p) # mean_median(p, 'join_Dem%') scores = { 'Mean-Median': functools.partial(mean_median, proportion_column_name='join_Dem%'), 'Mean-Thirdian': functools.partial(mean_thirdian, proportion_column_name='join_Dem%'), 'Efficiency Gap': functools.partial(efficiency_gap, col1='join_Dem', col2='join_Rep'), 'L1 Reciprocal Polsby-Popper': L1_reciprocal_polsby_popper } initial_scores = {key: score(p) for key, score in scores.items()} table = pipe_to_table(chain, scores) fig, axes = plt.subplots(2, 2) quadrants = { 'Mean-Median': (0, 0), 'Mean-Thirdian': (0, 1), 'Efficiency Gap': (1, 0), 'L1 Reciprocal Polsby-Popper': (1, 1) } for key in scores: quadrant = quadrants[key] axes[quadrant].hist(table[key], bins=50) axes[quadrant].set_title(key) axes[quadrant].axvline(x=initial_scores[key], color='r') plt.show() '''