def generator(
    stop_event,
    partition_queue,
    step_count=1000,
    burn_in_ratio=0.1,
    thinning=5,
    seed=2020,
):
    """
    Creates and runs generator of map proposals

    Parameters
    ----------
    step_count : int, Default 1000
        steps of chain to run (after burn-in)
    burn_in_ratio : float, Default 0.1
        steps to burn-in for, as a ratio of the total step count (not to collect data)
    stop_event: multiprocessing.Event
        tells chain's workers if generation has stopped
    partition_queue: multiprocessing.Queue
        structure that takes each partition as it is generated
    thinning: int, Default 5
        Take every <thinning>th result from the chain to minimize dependence
    seed: int, Default 2020
        Random seed for reproducibility
    """
    # FOR REPRODUCIBILITY
    from gerrychain.random import random

    random.seed(seed)

    init_partition = Partition(
        graph,
        assignment=race_matrix.to_dict()["partition"],
        updaters={"population": Tally("population")},
    )

    chain = MarkovChain(
        proposal=propose_chunk_flip,
        constraints=is_valid,
        accept=always_accept,
        initial_state=init_partition,
        total_steps=step_count + burn_in_ratio * step_count,
    )

    iter(chain)

    burn_bar = trange(int(burn_in_ratio * step_count),
                      desc=f"Burn-in",
                      leave=True,
                      position=0)
    pbar = trange(
        int(burn_in_ratio * step_count) + step_count,
        desc=f"Generating maps",
        leave=True,
        position=0,
    )

    # burn-in
    # for _ in burn_bar:
    #     next(chain)

    for i in pbar:
        map_proposal = (i, dict(next(chain).assignment))
        # only push proposal to queue if it is <thinning>th proposal
        if i % thinning == 0:
            partition_queue.put(map_proposal)
    stop_event.set()
    # # send a text when done (SEE FIELDS)
    # client.messages.create(
    #     to=<YOUR PHONE NUMBER>,
    #     from_=<TWILIO SOUCE NUMBER>,
    #     body=f"{_type.capitalize()} flip for {CITY_NAME} completed.",
    # )
    print(f"{CITY_NAME} Generator: {stop_event.is_set()}")
示例#2
0
#### the second argument is a dictionary matching political parties to their
#### vote total columns in our shapefile. This will let us compute hypothetical
#### election results for each districting plan in the ensemble.

election = Election("SEN12", {"Dem": "USS12D", "Rep": "USS12R"})

#### Finally, we create a Partition of the graph. This will be the starting
#### point for our Markov chain.

initial_partition = Partition(graph,
                              assignment="2011_PLA_1",
                              updaters={
                                  "cut_edges":
                                  cut_edges,
                                  "population":
                                  Tally("TOT_POP", alias="population"),
                                  "SEN12":
                                  election
                              })

#### With the "population" updater configured, we can see the total population
#### in each of our congressional districts. In an interactive Python session,
#### we can print out the populations like this:

for district, pop in initial_partition["population"].items():
    print("District {}: {}".format(district, pop))

#### Notice that partition["population"] is a dictionary mapping the ID of each
#### district to its total population (that’s why we can call the .items()
#### method on it). Most updaters output values in this dictionary format.
示例#3
0
    node_shape="s",
    cmap="tab20",
)
plt.show()


# ###CONFIGURE UPDATERS
def step_num(partition):
    parent = partition.parent
    if not parent:
        return 0
    return parent["step_num"] + 1


updaters = {
    "population": Tally("population"),
    "cut_edges": cut_edges,
    "step_num": step_num,
    "Pink-Purple": Election("Pink-Purple", {
        "Pink": "pink",
        "Purple": "purple"
    }),
}

# ########BUILD PARTITION

grid_partition = Partition(graph, assignment=cddict, updaters=updaters)

# ADD CONSTRAINTS
# FOr our 10x10 grid, will only allow districts of exactly 10 vertices
popbound = within_percent_of_ideal_population(grid_partition, 0.1)
示例#4
0
def run_full_chain(chain_name):
    # # twilio setup, requires proper env variables to be set up (so it will text you when the chain is done)
    # account = os.environ["TWILIO_ACCT"]
    # auth = os.environ["TWILIO_AUTH"]
    # client = Client(account, auth)

    # get hyperparams
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-s",
        "--steps",
        type=int,
        help="number of steps for each markov chain",
        default=100000,
    )
    parser.add_argument("city", type=str, help="city name, i.e. Atlanta")
    parser.add_argument("state", type=str, help="state code, i.e. GA")
    parser.add_argument(
        "fips", help="state FIPS code (zero-padded on the end), i.e. 130")
    args = parser.parse_args()

    STEP_COUNT = args.steps
    BURN_IN_RATIO = 0.1
    CITY_NAME = args.city
    STATE = args.state
    STATE_FIPS = str(args.fips)
    THINNING_FACTOR = 5  # measure entropy only once every these many iterations of MC

    race_matrix = load_data(CITY_NAME, STATE, STATE_FIPS, fake=False)
    R_scratch = race_matrix[[
        "partition", "geometry"
    ]]  # scratch version of R for the polsby-popper computation

    print(race_matrix.head())

    # build chain
    graph = Graph.from_geodataframe(race_matrix, adjacency="queen")
    nx.set_node_attributes(graph,
                           race_matrix["total"].to_dict(),
                           name="population")
    init_partition = Partition(
        graph,
        assignment=race_matrix.to_dict()["partition"],
        updaters={"population": Tally("population")},
    )

    # validators
    def mean_pop(part):
        return np.mean(list(part["population"].values()))

    def min_pop(part):
        return np.min(list(part["population"].values()))

    def sd_pop(part):
        return np.std(list(part["population"].values()))

    # TODO: only check if GISJOIN in minimum P-P partition have changed
    # TODO: cache set of GISJOINs for minimum partition for lowest P-P partition
    # TODO: compare this set to the new one when given a partition
    # TODO: if set is different, recompute P-P for whole partition, else do nothing
    def partition_polsby_popper(part, R=R_scratch):
        """Checks if partition is within polsby-popper metric

        Args:
            partition (gerrychain partition): partition map from a single step in the Markov Chain
            R (geopandas.GeoDataFrame): columns 'partition' and 'geometry' for getting the polygons

        Returns:
            function that takes partition and checks if it's within the bounds
        """
        # get all shapes from each district
        # compute polsby-popper on all districts, get min
        pd.options.mode.chained_assignment = None
        R.loc[:, "partition"] = race_matrix.index.map(dict(part.assignment))
        R_temp = R.copy(deep=True).dissolve(by="partition")
        polsby_popper = lambda d: (4 * np.pi * d.area) / (d.length**2
                                                          )  # d is a polygon
        # srs = R["geometry"].map(polsby_popper).values
        # print(np.min(srs), np.mean(srs), np.max(srs))
        # return srs.min()
        return R_temp["geometry"].map(polsby_popper).min()
        # return min(polsby_popper_from_R(R).values())

    def polsby_popper_from_R(R):
        """A more stable version of geopandas dissolve."""
        from shapely.ops import unary_union

        # loop through all partitons and unary join them, the return a dict indexed by partition id
        result = {}
        polsby_popper = lambda d: (4 * np.pi * d.area) / (d.length**2
                                                          )  # d is a polygon
        for pid in R["partition"].unique():
            # get all geometries
            geom = R.loc[R["partition"] == pid]["geometry"].values
            result[pid] = polsby_popper(unary_union(geom))
        return result

    def partition_polsby_popper_min(
        part,
        R=R_scratch,
    ):
        nonlocal min_partition_id
        nonlocal min_partition_gisjoins
        nonlocal min_partition_p_p
        pd.options.mode.chained_assignment = None
        R.loc[:, "partition"] = race_matrix.index.map(dict(part.assignment))
        same_gisjoins = (set(
            R.loc[R["partition"] == min_partition_id].index.values) ==
                         min_partition_gisjoins)
        if min_partition_id is not None and same_gisjoins:
            # no change, return the old one
            return min_partition_p_p
        else:
            # something changed, so recompute all partitions
            # R_temp = R.copy(deep=True).dissolve(by="partition")
            # p_p_scores = R_temp["geometry"].map(polsby_popper)
            # min_partition_p_p = p_p_scores.min()
            # min_partition_id = R_temp.iloc[np.argmin(p_p_scores.values)].name
            p_p_scores = polsby_popper_from_R(R)
            min_partition_p_p = min(p_p_scores.values())
            min_partition_id = min(p_p_scores.items(), key=lambda x: x[1])[0]
            min_partition_gisjoins = set(
                R.loc[R["partition"] == min_partition_id].index.values)
            if (min_partition_p_p <
                    0.147):  # initial oakland partition has min score of 0.147
                print("Rejected with score", min_partition_p_p)
            return min_partition_p_p

    mean_one_sd_up = mean_pop(init_partition) + (2 /
                                                 3) * sd_pop(init_partition)
    mean_one_sd_down = mean_pop(init_partition) - (2 /
                                                   3) * sd_pop(init_partition)

    min_partition_id, min_partition_gisjoins, min_partition_p_p = None, set(
    ), None

    # initalize and run chains
    # TODO: record descent
    is_valid = Validator([
        LowerBound(min_pop,
                   min_pop(init_partition) % 50),
        UpperBound(mean_pop, mean_one_sd_up),
        LowerBound(mean_pop, mean_one_sd_down),
        WithinPercentRangeOfBounds(sd_pop, 25),
        # contiguous,
        # LowerBound(
        #     partition_polsby_popper, bound=partition_polsby_popper(init_partition)
        # ),
        # LowerBound(
        #     partition_polsby_popper_min,
        #     bound=partition_polsby_popper_min(init_partition),
        # ),
        no_vanishing_districts,
    ])

    # make sure init_partition passes validators
    assert is_valid(init_partition)

    chain = MarkovChain(
        proposal=propose_chunk_flip,
        constraints=is_valid,
        accept=always_accept,
        initial_state=init_partition,
        total_steps=(STEP_COUNT * THINNING_FACTOR) +
        int(STEP_COUNT * BURN_IN_RATIO),
    )
    print(f"Prereqs created, {chain_name} running...")
    # burn-in of 1000
    iter(chain)
    # print(f"Burn-in: ({int(STEP_COUNT * BURN_IN_RATIO)} steps)")
    for i in range(int(STEP_COUNT * BURN_IN_RATIO)):
        if i % 100 == 0:
            print(
                f"{chain_name} BURN IN => {i}/{int(STEP_COUNT * BURN_IN_RATIO)}"
            )
        next(chain)
    # print(f"Measurement: ({STEP_COUNT} steps)")
    entropies = []
    scores = []
    start_time = time.time()

    for i in range(STEP_COUNT * THINNING_FACTOR):
        if i % 25 == 0:
            print(
                f"{chain_name} ELAPSED {round(time.time() - start_time, 1)}s => {len(entropies)}/{STEP_COUNT}"
            )
        if i % THINNING_FACTOR == 0:
            part = next(chain)
            entropies.append(chain_to_entropy(part, race_matrix))
            scores.append(partition_polsby_popper_min(part))
        else:
            next(chain)

    np.save("./results_2020/polsby_popper_oakland.npy", np.array(scores))

    save_results(
        CITY_NAME,
        STEP_COUNT,
        chain_name,
        baseline=chain_to_entropy(init_partition, race_matrix),
        entropies=entropies,
    )
def run_ensemble_on_distro(graph, min_pop_col, maj_pop_col, tot_pop_col, num_districts, initial_plan, num_steps, pop_tol = 0.05, min_win_thresh = 0.5):
    """Runs a Recom chain on a given graph with a given minority/majority population distribution and returns lists of cut edges, minority seat wins, and tuples of minority percentage by district for each step of the chain.
    
    Parameters:
    graph (networkx.Graph) -- a NetworkX graph object representing the dual graph on which to run the chain. The nodes should have attributes for majority population, minority population, and total population.
    min_pop_col (string) -- the key/column name for the minority population attribute in graph
    maj_pop_col (string) -- the key/column name for the majority population attribute in graph
    tot_pop_col (string) -- the key/column name for the total population attribute in graph
    num_districts (int) -- number of districts to run for the chain
    initial_plan (gerrychain.Partition) -- an initial partition for the chain (which does not need updaters since the function will supply its own updaters)
    num_steps (int) -- the number of steps for which to run the chain
    pop_tol (float, default 0.05) -- tolerance for deviation from perfectly balanced populations between districts
    min_win_thresh (float, default 0.5) -- percent of minority population needed in a district for it to be considered a minority win. If the minority percentage in a district is greater than or equal to min_win_thresh then that district is considered a minority win.
    
    Returns:
    [cut_edges_list,min_seats_list,min_percents_list] (list)
        WHERE
        cut_edges_list (list) -- list where cut_edges_list[i] is the number of cut edges in the partition at step i of the Markov chain
        min_seats_list -- list where min_seats_list[i] is the number of districts won by the minority (according to min_win_thresh) at step i of the chain
        min_percents_list -- list where min_percents_list[i] is a tuple, with min_percents_list[i][j] being the minority percentage in district j at step i of the chain
    """
    my_updaters = {
        "population": Tally(tot_pop_col, alias = "population"),
        "cut_edges": cut_edges,
        "maj-min": Election("maj-min", {"maj": maj_pop_col, "min": min_pop_col}),
    }
    
    initial_partition = Partition(graph = initial_plan.graph, assignment = initial_plan.assignment, updaters = my_updaters)
    
    # ADD CONSTRAINTS
    popbound = within_percent_of_ideal_population(initial_partition, 0.1)
    
    # ########Setup Proposal
    ideal_population = sum(initial_partition["population"].values()) / len(initial_partition)
    
    tree_proposal = partial(
        recom,
        pop_col=tot_pop_col,
        pop_target=ideal_population,
        epsilon=pop_tol,
        node_repeats=1,
    )
    
    # ######BUILD MARKOV CHAINS
    
    recom_chain = MarkovChain(
        tree_proposal,
        Validator([popbound]),
        accept=always_accept,
        initial_state=initial_partition,
        total_steps=num_steps,
    )
    
    cut_edges_list = []
    min_seats_list = []
    min_percents_list = []
    
    for part in recom_chain:
        cut_edges_list.append(len(part["cut_edges"]))
        min_percents_list.append(part["maj-min"].percents("min"))
        min_seats = (np.array(part["maj-min"].percents("min")) >= min_win_thresh).sum()
        min_seats_list.append(min_seats)
    
    return [cut_edges_list,min_seats_list,min_percents_list]
示例#6
0
graph = nx.relabel_nodes(graph, df[uid])

elections = [
    Election("PRES16", {
        "Democratic": "PRES16D",
        "Republican": "PRES16R"
    }),
    Election("SEN16", {
        "Democratic": "SEN16D",
        "Republican": "SEN16R"
    })
]

#my_updaters = {"population" : updaters.Tally("TOTPOP", alias="population")}
my_updaters = {
    "population": Tally(pop_col, alias="population"),
    "cpop": Tally(ccol, alias="cpop"),
    "cut_edges": cut_edges
}
election_updaters = {election.name: election for election in elections}
my_updaters.update(election_updaters)

tot_pop_col = 0
tot_ccol = 0
#for tallying over totpop:
#for n in graph.nodes():
#    graph.node[n][pop_col] = int(graph.node[n][pop_col])
#    tot_pop_col += graph.node[n][pop_col]

#cddict = recursive_tree_part(graph,range(num_districts),tot_pop_col/num_districts,pop_col,0.01,1)
示例#7
0
def partition_with_pop(graph_with_pop):
    return Partition(
        graph_with_pop,
        {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1, 6: 1, 7: 1, 8: 1},
        updaters={"pop": Tally("pop"), "cut_edges": cut_edges},
    )
def demo():
    graph_path = "./Data/PA_VTDALL.json"
    graph = Graph.from_json(graph_path)
    k = 18
    ep = 0.05
    pop_col = "TOT_POP"

    plot_path = "./Data/VTD_FINAL"
    unit_df = gpd.read_file(plot_path)
    unit_col = "GEOID10"
    division_col = "COUNTYFP10"
    divisions = unit_df[[division_col, 'geometry']].dissolve(by=division_col,
                                                             aggfunc='sum')
    county_dict = pd.Series(unit_df[division_col].values,
                            index=unit_df[unit_col]).to_dict()

    for v in graph.nodes:
        graph.nodes[v]['division'] = county_dict[v]
        graph.nodes[v][unit_col] = v

    updaters = {
        "population": Tally("TOT_POP", alias="population"),
        "cut_edges": cut_edges,
    }
    cddict = recursive_tree_part(graph,
                                 range(k),
                                 unit_df[pop_col].sum() / k,
                                 pop_col,
                                 .01,
                                 node_repeats=1)
    initial_partition = Partition(graph, cddict, updaters)
    ideal_population = sum(
        initial_partition["population"].values()) / len(initial_partition)
    division_proposal = partial(recom,
                                pop_col=pop_col,
                                pop_target=ideal_population,
                                epsilon=0.05,
                                method=partial(division_bipartition_tree,
                                               division_col='division'),
                                node_repeats=2)

    chain = MarkovChain(proposal=division_proposal,
                        constraints=[
                            constraints.within_percent_of_ideal_population(
                                initial_partition, 0.05),
                        ],
                        accept=accept.always_accept,
                        initial_state=initial_partition,
                        total_steps=1000)

    t = 0
    snapshot = 100
    for part in chain:
        if t % snapshot == 0:
            draw_graph(graph,
                       part.assignment,
                       unit_df,
                       divisions,
                       './figs/chain_' + str(t) + '.png',
                       geo_id=unit_col)
            print(
                "t: ", t, ", num_splits: ",
                num_splits(part,
                           unit_df,
                           geo_id=unit_col,
                           division_col=division_col), ", cut_length",
                cut_length(part))
        t += 1
示例#9
0
# get enacted map open
graph = Graph.from_file(
    "/Users/hopecj/projects/gerryspam/MO/dat/final_prec/prec_labeled.shp")
elections = [
    Election("USSEN16", {
        "Dem": "G16USSDKAN",
        "Rep": "G16USSRBLU"
    }),
    Election("PRES16", {
        "Dem": "G16PREDCLI",
        "Rep": "G16PRERTRU"
    })
]

mo_updaters = {
    "population": Tally("POP10", alias="population"),
    "cut_edges": cut_edges,
}
election_updaters = {election.name: election for election in elections}
mo_updaters.update(election_updaters)

sen_part = Partition(graph, assignment="SLDUST", updaters=mo_updaters)
sen_part["PRES16"].efficiency_gap()
sen_part.plot(cmap="tab20")
plt.show()  # show the state senate map
cong_part = Partition(graph, assignment="SLDLST", updaters=mo_updaters)

# load parts (aka district maps) from gerrychain
# "assignment" = mapping of node IDs to district IDs
sen_parts = np.load(
    "/Users/hopecj/projects/gerryspam/MO/res/MO_state_senate_100000_0.05_parts.p"
示例#10
0
def chain(iterations):
    idef = random.randint(1, 10000)
    graph = Graph.from_json("./PA_VTD.json")

    election = Election("SEN12", {"Dem": "USS12D", "Rep": "USS12R"})

    initial_partition = GeographicPartition(graph,
                                            assignment="2011_PLA_1",
                                            updaters={
                                                "cut_edges":
                                                cut_edges,
                                                "population":
                                                Tally("TOT_POP",
                                                      alias="population"),
                                                "SEN12":
                                                election
                                            })

    ideal_population = sum(
        initial_partition["population"].values()) / len(initial_partition)

    # We use functools.partial to bind the extra parameters (pop_col, pop_target, epsilon, node_repeats)
    # of the recom proposal.

    proposal = partial(recom,
                       pop_col="TOT_POP",
                       pop_target=ideal_population,
                       epsilon=0.02,
                       node_repeats=2)

    chain = MarkovChain(proposal=proposal,
                        constraints=[],
                        accept=contiguous,
                        initial_state=initial_partition,
                        total_steps=iterations + 100)

    count = 0
    metrics = []
    boundary_nodes = []
    boundary_weighted = []
    for partition in chain.with_progress_bar():
        mm = mean_median(partition["SEN12"])
        p = pp(partition)
        bias = partisan_bias(partition["SEN12"])
        gini = partisan_gini(partition["SEN12"])
        gap = efficiency_gap(partition["SEN12"])
        cut = len(partition["cut_edges"])
        if count >= 100:
            metrics.append((mm, p, bias, gini, gap, cut))
            nodes = [0] * 8921
            bnodes = [0] * 8921
            for edge in partition["cut_edges"]:
                nodes[edge[0]] = 1
                nodes[edge[1]] = 1
                bnodes[edge[0]] += 1
                bnodes[edge[1]] += 1
            boundary_nodes.append(nodes)
            boundary_weighted.append(bnodes)
        if count % 100 == 0:
            print(idef, count, mm, p, bias, gini, gap, cut)
        count += 1

    return metrics, boundary_nodes, boundary_weighted
    largest_component_size = max(len(c) for c in components)
    to_delete = [c for c in components if len(c) != largest_component_size]
    for c in to_delete:
        for node in c:
            graph.remove_node(node)


    election = Election("PRETOT16", {"Dem": "PREDEM16", "Rep": "PREREP16"})

    #Create initial parition based on congressional districts
    initial_partition = Partition(
        graph,
        assignment="CON",
        updaters={
            "cut_edges": cut_edges,
            "population": Tally("PERSONS", alias="population"),
            "PRETOT16": election
            }
        )

    # Example set of NoInitialChains to run:
    pop_constraint = constraints.within_percent_of_ideal_population(initial_partition, 0.06)
    compactness_bound = constraints.UpperBound(
        lambda p: len(p["cut_edges"]),
        2*len(initial_partition["cut_edges"])
        )

    chainFlipAlwaysShort = NoInitialChain(
        proposal=propose_random_flip,
        constraints=[single_flip_contiguous,
                     pop_constraint,
示例#12
0
def main(config_data, id):
    """Runs a single experiment with the given config file. Loads a graph,
    runs a Chain to search for a Gerrymander, metamanders around that partition,
    runs another chain, and then saves the generated data.

    Args:
        config_data (Object): configuration of experiment loaded from JSON file
        id (String): id of experiment, used in tags to differentiate between
        experiments
    """
    try:

        timeBeg = time.time()
        print('Experiment', id, 'has begun')
        # Save configuration into global variable
        global config
        config = config_data

        # Get graph and dual
        graph, dual = preprocessing(config["INPUT_GRAPH_FILENAME"])
        # List of districts in original graph
        parts = list(set([graph.nodes[node][config['ASSIGN_COL']] for node in graph.nodes()]))
        # Ideal population of districts
        ideal_pop = sum([graph.nodes[node][config['POP_COL']] for node in graph.nodes()]) / len(parts)
        # Initialize partition
        election = Election(
                            config['ELECTION_NAME'],
                            {'PartyA': config['PARTY_A_COL'],
                            'PartyB': config['PARTY_B_COL']}
                            )

        updaters = {'population': Tally(config['POP_COL']),
                    'cut_edges': cut_edges,
                    config['ELECTION_NAME'] : election
                    }

        origPartition = Partition(graph=graph, assignment=config['ASSIGN_COL'], updaters=updaters)
        minAvg, minPartition = float('inf'), None
        for i in range(config['RUNS_PER_K_VAL']):
            tempGraph = copy.deepcopy(origPartition.graph)
            face_sierpinski_mesh(origPartition, tempGraph , getKFaces(dual, config['k']))
            # Refresh assignment and election of partition
            updaters[config['ELECTION_NAME']] = Election(
                                                         config['ELECTION_NAME'],
                                                         {'PartyA': config['PARTY_A_COL'],
                                                          'PartyB': config['PARTY_B_COL']}
                                                        )
            newPartition = Partition(graph=tempGraph, assignment=config['ASSIGN_COL'], updaters=updaters)
            if (avg := run_chain(newPartition, config['CHAIN_TYPE'],
                                               config['TEST_META_LENGTH'], ideal_pop, id + 'a' + str(i),
                                               config['TEST_RUN_STATS_TAG'] + id + str(i))) < minAvg:
                minAvg, minPartition = avg, newPartition

        updaters[config['ELECTION_NAME']] = Election(
                                                     config['ELECTION_NAME'],
                                                     {'PartyA': config['PARTY_A_COL'],
                                                      'PartyB': config['PARTY_B_COL']}
                                                    )
        partition = Partition(graph=minPartition.graph, assignment=config['ASSIGN_COL'], updaters=updaters)
        # Run chain again
        run_chain(partition, config['CHAIN_TYPE'], config['FULL_CHAIN_LENGTH'],
                  ideal_pop, id + 'b', config['FULL_RUN_STATS_TAG'] + id)
        # Save data from experiment to JSON files
        drawGraph(partition.graph, 'cut_times', config['GRAPH_TAG'] + '_single_raw_' + id)
        drawGraph(partition.graph, 'sibling_cuts', config['GRAPH_TAG'] + '_single_adjusted_' + id)
        drawDoubleGraph(partition.graph, 'cut_times', config['GRAPH_TAG'] + '_double_raw_' + id)
        drawDoubleGraph(partition.graph, 'sibling_cuts', config['GRAPH_TAG'] + '_double_adjusted_' + id)
        saveGraphStatistics(partition.graph, config['GRAPH_STATISTICS_TAG'] + id)

        print('Experiment {} completed in {:.2f} seconds'.format(id, time.time() - timeBeg))
示例#13
0
POP_COL = "TOTPOP"
NUM_DISTRICTS = num_districts_in_map[args.map]
ITERS = args.n
EPS = args.eps #epsilons[args.map]


## Pull in graph and set up updaters

print("Reading in Data/Graph")

df = gpd.read_file("data/OR_blocks/OR_blocks.shp")
with open("data/OR_blocks/OR_block_graph.p", "rb") as f_in:
    graph = pickle.load(f_in)


or_updaters = {"population" : Tally(POP_COL, alias="population"),
               "cut_edges": cut_edges,
               "VAP": Tally("VAP"),
               "WVAP": Tally("WVAP"),
               "HVAP": Tally("HVAP"),
               "ASIANVAP": Tally("ASIANVAP"),
               "HVAP_perc": lambda p: {k: (v / p["VAP"][k]) for k, v in p["HVAP"].items()},
               "WVAP_perc": lambda p: {k: (v / p["VAP"][k]) for k, v in p["WVAP"].items()},
               "ASIANVAP_perc": lambda p: {k: (v / p["VAP"][k]) for k, v in p["ASIANVAP"].items()},
               "HAVAP_perc": lambda p: {k: ((p["HVAP"][k] + p["ASIANVAP"][k]) / v) for k, v in p["VAP"].items()},}

# election_updaters = {election.name: election for election in elections}
# or_updaters.update(election_updaters)

## Create seed plans and Set up Markov chain
示例#14
0
    Election("AG16", {
        "Dem": "AG16D",
        "Rep": "AG16R"
    }),
    Election("SOS16", {
        "Dem": "SOS16D",
        "Rep": "SOS16R"
    }),
    Election("GOV18", {
        "Dem": "GOV18D",
        "Rep": "GOV18R"
    })
]

or_updaters = {
    "population": Tally(POP_COL, alias="population"),
    "cut_edges": cut_edges
}

election_updaters = {election.name: election for election in elections}
or_updaters.update(election_updaters)

## Create seed plans and Set up Markov chain

print("Creating seed plan")

total_pop = sum(df[POP_COL])
ideal_pop = total_pop / NUM_DISTRICTS

cddict = recursive_tree_part(graph=graph,
                             parts=range(NUM_DISTRICTS),
def main():
    #gerrychain parameters
    #num districts
    k = 12
    epsilon = .05
    updaters = {
        'population': Tally('population'),
        'cut_edges': cut_edges,
    }
    graph, dual = preprocessing("json/NC.json")
    ideal_population = sum(graph.nodes[x]["population"]
                           for x in graph.nodes()) / k
    faces = graph.graph["faces"]
    faces = list(faces)
    #random.choice(faces) will return a random face
    #TODO: run gerrychain on graph
    totpop = 0
    for node in graph.nodes():
        totpop += int(graph.nodes[node]['population'])
    # length of chain
    steps = 30000
    #beta thereshold, how many steps to hold beta at 0

    temperature = 1
    beta_threshold = 10000
    #length of each gerrychain step
    gerrychain_steps = 250
    #faces that are currently sierp
    special_faces = []
    chain_output = {'dem_seat_data': [], 'rep_seat_data': [], 'score': []}
    #start with small score to move in right direction
    chain_output['score'].append(1 / 1100000)

    z = 0
    for i in range(steps):
        if z % 100 == 0:
            z += 1
            print("step ", z)
        face = random.choice(faces)

        ##Makes the Markov chain lazy -- this just makes the chain aperiodic.
        if random.random() > .5:
            if not face in special_faces:
                special_faces.append(face)
            else:
                special_faces.remove(face)

        face_sierpinski_mesh(graph, special_faces)

        initial_partition = Partition(graph,
                                      assignment=config['ASSIGN_COL'],
                                      updaters=updaters)

        # Sets up Markov chain
        popbound = within_percent_of_ideal_population(initial_partition,
                                                      epsilon)
        tree_proposal = partial(
            recom,
            pop_col=config['POP_COL'],
            pop_target=ideal_population,
            epsilon=epsilon,
            node_repeats=1,
            method=facefinder.my_mst_bipartition_tree_random)

        #make new function -- this computes the energy of the current map
        exp_chain = MarkovChain(tree_proposal,
                                Validator([popbound]),
                                accept=accept.always_accept,
                                initial_state=initial_partition,
                                total_steps=gerrychain_steps)
        seats_won_for_republicans = []
        seats_won_for_democrats = []
        for part in exp_chain:
            rep_seats_won = 0
            dem_seats_won = 0
            for i in range(k):
                rep_votes = 0
                dem_votes = 0
                for n in graph.nodes():
                    if part.assignment[n] == i:
                        rep_votes += graph.nodes[n]["EL16G_PR_R"]
                        dem_votes += graph.nodes[n]["EL16G_PR_D"]
                total_seats_dem = int(dem_votes > rep_votes)
                total_seats_rep = int(rep_votes > dem_votes)
                rep_seats_won += total_seats_rep
                dem_seats_won += total_seats_dem
            seats_won_for_republicans.append(rep_seats_won)
            seats_won_for_democrats.append(dem_seats_won)

        score = statistics.mean(seats_won_for_republicans)

        ##This is the acceptance step of the Metropolis-Hasting's algorithm.
        if random.random() < min(
                1, (math.exp(score) / chain_output['score'][z - 1])
                **(1 / temperature)):
            #if code acts weird, check if sign is wrong, unsure
            #rand < min(1, P(x')/P(x))
            chain_output['dem_seat_data'].append(seats_won_for_democrats)
            chain_output['rep_seat_data'].append(seats_won_for_republicans)
            chain_output['score'].append(
                math.exp(statistics.mean(seats_won_for_republicans)))
        else:
            chain_output['dem_seat_data'].append(
                chain_output['dem_seat_data'][z - 1])
            chain_output['rep_seat_data'].append(
                chain_output['rep_seat_data'][z - 1])
            chain_output['score'].append(chain_output['score'][z - 1])
示例#16
0
def main():
    """ Contains majority of expermiment. Runs a markov chain on the state dual graph, determining how the distribution is affected to changes in the
     state dual graph.
     Raises:
        RuntimeError if PROPOSAL_TYPE of config file is neither 'sierpinski'
        nor 'convex'
    """
    output_directory = createDirectory(config)
    epsilon = config["epsilon"]
    k = config["NUM_DISTRICTS"]
    updaters = {
        'population': Tally('population'),
        'cut_edges': cut_edges,
    }
    graph, dual = preprocessing(config["INPUT_GRAPH_FILENAME"],
                                output_directory)
    ideal_population = sum(graph.nodes[x]["population"]
                           for x in graph.nodes()) / k
    faces = graph.graph["faces"]
    faces = list(faces)
    square_faces = [face for face in faces if len(face) == 4]
    totpop = 0
    for node in graph.nodes():
        totpop += int(graph.nodes[node]['population'])
    #length of chain
    steps = config["CHAIN_STEPS"]

    #length of each gerrychain step
    gerrychain_steps = config["GERRYCHAIN_STEPS"]
    #faces that are currently modified. Code maintains list of modified faces, and at each step selects a face. if face is already in list,
    #the face is un-modified, and if it is not, the face is modified by the specified proposal type.
    special_faces = set(
        [face for face in square_faces if np.random.uniform(0, 1) < .5])
    chain_output = defaultdict(list)
    #start with small score to move in right direction
    print("Choosing", math.floor(len(faces) * config['PERCENT_FACES']),
          "faces of the dual graph at each step")
    max_score = -math.inf
    #this is the main markov chain
    for i in tqdm.tqdm(range(1, steps + 1), ncols=100, desc="Chain Progress"):
        special_faces_proposal = copy.deepcopy(special_faces)
        proposal_graph = copy.deepcopy(graph)
        if (config["PROPOSAL_TYPE"] == "sierpinski"):
            for i in range(math.floor(len(faces) * config['PERCENT_FACES'])):
                face = random.choice(faces)
                ##Makes the Markov chain lazy -- this just makes the chain aperiodic.
                if random.random() > .5:
                    if not (face in special_faces_proposal):
                        special_faces_proposal.append(face)
                    else:
                        special_faces_proposal.remove(face)
            face_sierpinski_mesh(proposal_graph, special_faces_proposal)
        elif (config["PROPOSAL_TYPE"] == "add_edge"):
            for j in range(
                    math.floor(len(square_faces) * config['PERCENT_FACES'])):
                face = random.choice(square_faces)
                ##Makes the Markov chain lazy -- this just makes the chain aperiodic.
                if random.random() > .5:
                    if not (face in special_faces_proposal):
                        special_faces_proposal.add(face)
                    else:
                        special_faces_proposal.remove(face)
            add_edge_proposal(proposal_graph, special_faces_proposal)
        else:
            raise RuntimeError(
                'PROPOSAL TYPE must be "sierpinski" or "convex"')

        initial_partition = Partition(proposal_graph,
                                      assignment=config['ASSIGN_COL'],
                                      updaters=updaters)

        # Sets up Markov chain
        popbound = within_percent_of_ideal_population(initial_partition,
                                                      epsilon)
        tree_proposal = partial(recom,
                                pop_col=config['POP_COL'],
                                pop_target=ideal_population,
                                epsilon=epsilon,
                                node_repeats=1)

        #make new function -- this computes the energy of the current map
        exp_chain = MarkovChain(tree_proposal,
                                Validator([popbound]),
                                accept=accept.always_accept,
                                initial_state=initial_partition,
                                total_steps=gerrychain_steps)
        seats_won_for_republicans = []
        seats_won_for_democrats = []
        for part in exp_chain:
            rep_seats_won = 0
            dem_seats_won = 0
            for j in range(k):
                rep_votes = 0
                dem_votes = 0
                for n in graph.nodes():
                    if part.assignment[n] == j:
                        rep_votes += graph.nodes[n]["EL16G_PR_R"]
                        dem_votes += graph.nodes[n]["EL16G_PR_D"]
                total_seats_dem = int(dem_votes > rep_votes)
                total_seats_rep = int(rep_votes > dem_votes)
                rep_seats_won += total_seats_rep
                dem_seats_won += total_seats_dem
            seats_won_for_republicans.append(rep_seats_won)
            seats_won_for_democrats.append(dem_seats_won)

        seat_score = statistics.mean(seats_won_for_republicans)

        #implement modified mattingly simulated annealing scheme, from evaluating partisan gerrymandering in wisconsin
        if i <= math.floor(steps * .67):
            beta = i / math.floor(steps * .67)
        else:
            beta = (i / math.floor(steps * .67)) * 100
        temperature = 1 / (beta)

        weight_seats = 1
        weight_flips = -.2
        config['PERCENT_FACES'] = config['PERCENT_FACES']
        flip_score = len(
            special_faces)  # This is the number of edges being swapped

        score = weight_seats * seat_score + weight_flips * flip_score

        ##This is the acceptance step of the Metropolis-Hasting's algorithm. Specifically, rand < min(1, P(x')/P(x)), where P is the energy and x' is proposed state
        #if the acceptance criteria is met or if it is the first step of the chain
        def update_outputs():
            chain_output['dem_seat_data'].append(seats_won_for_democrats)
            chain_output['rep_seat_data'].append(seats_won_for_republicans)
            chain_output['score'].append(score)
            chain_output['seat_score'].append(seat_score)
            chain_output['flip_score'].append(flip_score)

        def propagate_outputs():
            for key in chain_output.keys():
                chain_output[key].append(chain_output[key][-1])

        if i == 1:
            update_outputs()
            special_faces = copy.deepcopy(special_faces_proposal)
        #this is the simplified form of the acceptance criteria, for intuitive purposes
        #exp((1/temperature) ( proposal_score - previous_score))
        elif np.random.uniform(0, 1) < (math.exp(score) / math.exp(
                chain_output['score'][-1]))**(1 / temperature):
            update_outputs()

            special_faces = copy.deepcopy(special_faces_proposal)
        else:
            propagate_outputs()

        #if score is highest seen, save map.
        if score > max_score:
            #todo: all graph coloring for graph changes that produced this score
            nx.write_gpickle(proposal_graph,
                             output_directory + '/' + "max_score",
                             pickle.HIGHEST_PROTOCOL)
            f = open(output_directory + "/max_score_data.txt", "w+")
            f.write("maximum score: " + str(score) + "\n" + "edges changed: " +
                    str(len(special_faces)) + "\n" + "Seat Score: " +
                    str(seat_score))
            save_obj(special_faces, output_directory + '/', "special_faces")
            max_score = score

    plt.plot(range(len(chain_output['score'])), chain_output['score'])
    plt.xlabel("Meta-Chain Step")
    plt.ylabel("Score")
    plot_name = output_directory + '/' + 'score' + '.png'
    plt.savefig(plot_name)

    ## Todo: Add scatter plot of the seat_score and flip_score here.

    save_obj(chain_output, output_directory, "chain_output")
                return parent["step_num"] + 1

            bnodes = [
                x for x in graph.nodes() if graph.node[x]["boundary_node"] == 1
            ]

            def bnodes_p(partition):

                return [
                    x for x in graph.nodes()
                    if graph.node[x]["boundary_node"] == 1
                ]

            updaters = {
                'population': Tally('population'),
                "boundary": bnodes_p,
                "slope": boundary_slope,
                'cut_edges': cut_edges,
                'step_num': step_num,
                'b_nodes': b_nodes_bi,
                'base': new_base,
                'geom': geom_wait,
                #"Pink-Purple": Election("Pink-Purple", {"Pink":"pink","Purple":"purple"})
            }

            #########BUILD PARTITION

            grid_partition = Partition(graph,
                                       assignment=cddict,
                                       updaters=updaters)
    graph.nodes[node]["area"] = float(graph.nodes[node][area_name])

#######################################################################
# ititialize partitions
tree_walk = False
population_size = 10

#IOWA starting
# init_dists = {88: 0, 75: 0, 98: 0, 82: 0, 45: 0, 29: 0, 33: 0, 37: 0, 96: 0, 80: 0, 78: 0, 40: 0, 81: 0, 63: 0, 83: 0, 1: 0, 74: 0, 0: 0, 62: 0, 4: 0, 86: 0, 27: 0, 6: 0, 52: 0, 89: 0, 11: 0, 91: 0, 15: 0, 23: 0, 31: 0, 85: 0, 59: 0, 5: 1, 10: 1, 38: 1, 8: 1, 92: 1, 16: 1, 24: 1, 53: 1, 76: 1, 94: 1, 28: 1, 35: 1, 34: 1, 51: 2, 93: 2, 54: 2, 95: 2, 25: 2, 73: 2, 22: 2, 47: 2, 71: 2, 41: 2, 60: 3, 17: 3, 12: 3, 30: 3, 65: 3, 79: 3, 36: 3, 50: 3, 56: 3, 58: 3, 49: 3, 18: 3, 9: 3, 7: 3, 87: 3, 90: 3, 44: 3, 77: 3, 13: 3, 14: 3, 66: 3, 42: 3, 20: 3, 69: 3, 55: 3, 70: 3, 46: 3, 19: 3, 61: 3, 2: 3, 67: 3, 97: 3, 43: 3, 72: 3, 26: 3, 39: 3, 84: 3, 32: 3, 64: 3, 21: 3, 57: 3, 3: 3, 48: 3, 68: 3}
# cddict = {v:int(init_dists[v]) for v in graph.nodes()}

#general starting
cddict = recursive_tree_part(graph, range(k), ideal_pop, "TOTPOP", .02, 3)

updaters = {
    "population": Tally("TOTPOP", alias="population"),
    "cut_edges": cut_edges,
    "centroids": centroids_x_y_area
}

init_partition = Partition(graph, assignment=cddict, updaters=updaters)

if tree_walk:
    ideal_population = sum(
        init_partition["population"].values()) / len(init_partition)

    proposal = partial(
        recom,
        pop_col="TOTPOP",
        pop_target=ideal_population,
        epsilon=0.02,
示例#19
0
    else:
        beta = 3
    return beta


fips = "05"
graph = Graph.from_json("./BG05/BG05.json")
totpop = sum([int(graph.nodes[n]["TOTPOP"]) for n in graph.nodes])

for n in graph.nodes:
    graph.nodes[n]["TOTPOP"] = int(graph.nodes[n]["TOTPOP"])

betas = []
ts = []
myupdaters = {
    'population': Tally('TOTPOP', alias="population"),
    'cut_edges': cut_edges,
    'step_num': step_num,
}

runlist = [0]
partdict = {r: [] for r in runlist}
allparts = []

#run annealing flip
for run in runlist:
    initial_ass = recursive_tree_part(graph, range(6), totpop / 6, "TOTPOP",
                                      .01, 1)
    initial_partition = Partition(graph,
                                  assignment=initial_ass,
                                  updaters=myupdaters)
def run_experiment(bases=[2 * 2.63815853],
                   pops=[.1],
                   time_between_outputs=10000,
                   total_run_length=100000000000000):

    mu = 2.63815853
    subsequence_step_size = 10000
    balances_burn_in = 1000000  #ignore the first 10000 balances

    # creating the boundary figure plot
    plt.figure()
    fig = plt.figure()
    #fig_intervals = plt.figure()
    #ax2=fig.add_axes([0,0,1,1])
    ax = plt.subplot(111, projection='polar')
    #ax.set_axis_off()
    #ax.xaxis.set_ticklabels([])
    ax.yaxis.set_ticklabels([])

    for pop1 in pops:
        for base in bases:
            for alignment in [1]:

                gn = 20
                k = 2
                ns = 120
                p = .5

                graph = nx.grid_graph([k * gn, k * gn])

                ########## BUILD ASSIGNMENT
                #cddict = {x: int(x[0]/gn)  for x in graph.nodes()}
                cddict = {x: 1 - 2 * int(x[0] / gn) for x in graph.nodes()}

                for n in graph.nodes():
                    if alignment == 0:
                        if n[0] > 19:
                            cddict[n] = 1
                        else:
                            cddict[n] = -1
                    elif alignment == 1:
                        if n[1] > 19:
                            cddict[n] = 1
                        else:
                            cddict[n] = -1
                    elif alignment == 2:
                        if n[0] > n[1]:
                            cddict[n] = 1
                        elif n[0] == n[1] and n[0] > 19:
                            cddict[n] = 1
                        else:
                            cddict[n] = -1
                    elif alignment == 10:
                        #This is for debugging the case of reaching trivial partitions.
                        if n[0] == 10 and n[1] == 10:
                            cddict[n] = 1
                        else:
                            cddict[n] = -1

                for n in graph.nodes():
                    graph.nodes[n]["population"] = 1
                    graph.nodes[n]["part_sum"] = cddict[n]
                    graph.nodes[n]["last_flipped"] = 0
                    graph.nodes[n]["num_flips"] = 0

                    if random.random() < p:
                        graph.nodes[n]["pink"] = 1
                        graph.nodes[n]["purple"] = 0
                    else:
                        graph.nodes[n]["pink"] = 0
                        graph.nodes[n]["purple"] = 1
                    if 0 in n or k * gn - 1 in n:
                        graph.nodes[n]["boundary_node"] = True
                        graph.nodes[n]["boundary_perim"] = 1

                    else:
                        graph.nodes[n]["boundary_node"] = False

                #graph.add_edges_from([((0,1),(1,0)), ((0,38),(1,39)), ((38,0),(39,1)), ((38,39),(39,38))])

                for edge in graph.edges():
                    graph[edge[0]][edge[1]]['cut_times'] = 0

                #this part adds queen adjacency
                #for i in range(k*gn-1):
                #    for j in range(k*gn):
                #        if j<(k*gn-1):
                #            graph.add_edge((i,j),(i+1,j+1))
                #            graph[(i,j)][(i+1,j+1)]["shared_perim"]=0
                #        if j >0:
                #            graph.add_edge((i,j),(i+1,j-1))
                #            graph[(i,j)][(i+1,j-1)]["shared_perim"]=0

                #graph.remove_nodes_from([(0,0),(0,39),(39,0),(39,39)])

                #del cddict[(0,0)]

                #del cddict[(0,39)]

                # cddict[(39,0)]

                #del cddict[(39,39)]
                ######PLOT GRIDS
                """
                plt.figure()
                nx.draw(graph, pos = {x:x for x in graph.nodes()} ,node_size = ns, node_shape ='s')
                plt.show()

                cdict = {1:'pink',0:'purple'}

                plt.figure()
                nx.draw(graph, pos = {x:x for x in graph.nodes()}, node_color = [cdict[graph.nodes[x]["pink"]] for x in graph.nodes()],node_size = ns, node_shape ='s' )
                plt.show()

                plt.figure()
                nx.draw(graph, pos = {x:x for x in graph.nodes()}, node_color = [cddict[x] for x in graph.nodes()] ,node_size = ns, node_shape ='s',cmap = 'tab20')
                plt.show()
                """

                ####CONFIGURE UPDATERS

                def new_base(partition):
                    return base

                def step_num(partition):
                    parent = partition.parent

                    if not parent:
                        return 0

                    return parent["step_num"] + 1

                bnodes = [
                    x for x in graph.nodes()
                    if graph.nodes[x]["boundary_node"] == 1
                ]

                def bnodes_p(partition):

                    return [
                        x for x in graph.nodes()
                        if graph.nodes[x]["boundary_node"] == 1
                    ]

                updaters = {
                    'population': Tally('population'),
                    "boundary": bnodes_p,
                    #"slope": boundary_slope,
                    'cut_edges': cut_edges,
                    'step_num': step_num,
                    'b_nodes': b_nodes_bi,
                    'base': new_base,
                    'geom': geom_wait,
                    #"Pink-Purple": Election("Pink-Purple", {"Pink":"pink","Purple":"purple"})
                }

                balances = []

                #########BUILD PARTITION

                grid_partition = Partition(graph,
                                           assignment=cddict,
                                           updaters=updaters)

                #ADD CONSTRAINTS
                popbound = within_percent_of_ideal_population(
                    grid_partition, pop1)

                #plt.figure()
                #nx.draw(graph, pos = {x:x for x in graph.nodes()}, node_color = [dict(grid_partition.assignment)[x] for x in graph.nodes()] ,node_size = ns, node_shape ='s',cmap = 'tab20')
                #plt.savefig("./plots/"+str(alignment)+"B"+str(int(100*base))+"P"+str(int(100*pop1))+"start.png")
                #plt.close()

                #########Setup Proposal
                ideal_population = sum(grid_partition["population"].values()
                                       ) / len(grid_partition)

                tree_proposal = partial(recom,
                                        pop_col="population",
                                        pop_target=ideal_population,
                                        epsilon=pop1,
                                        node_repeats=1)

                #######BUILD MARKOV CHAINS

                exp_chain = MarkovChain(
                    slow_reversible_propose_bi,
                    Validator([
                        single_flip_contiguous,
                        popbound  #,boundary_condition
                    ]),
                    accept=cut_accept,
                    initial_state=grid_partition,
                    total_steps=total_run_length)

                #########Run MARKOV CHAINS

                rsw = []
                rmm = []
                reg = []
                rce = []
                rbn = []
                waits = []

                slopes = []
                angles = []
                angles_safe = []
                ends_vectors_normalized = LinkedList()
                ends_vectors_normalized_bloated = LinkedList()
                import time

                st = time.time()

                total_waits = 0
                last_total_waits = 0
                t = 0

                subsequence_timer = 0

                balances = {}
                for b in np.linspace(0, 2, 100001):
                    balances[int(b * 100) / 100] = 0

                #first_partition = True
                for part in exp_chain:
                    rce.append(len(part["cut_edges"]))
                    wait_time_rv = part.geom
                    waits.append(wait_time_rv)
                    total_waits += wait_time_rv
                    rbn.append(len(list(part["b_nodes"])))

                    if total_waits > subsequence_timer + subsequence_step_size:

                        last_total_waits = total_waits

                        ends = boundary_ends(part)
                        if len(ends) == 2:
                            ends_vector = np.asarray(ends[1]) - np.asarray(
                                ends[0])
                            ends_vector_normalized = ends_vector / np.linalg.norm(
                                ends_vector)

                            #if first_partition == True:
                            #    ends_vectors_normalized.last_vector = ends_vector_normalized
                            #    first_partition = False

                            if ends_vectors_normalized.last:
                                # We choose the vector that preserves continuity
                                # previous_angle = ends_vectors_normalized.last_value()
                                previous = ends_vectors_normalized.last_vector

                                d_previous = np.linalg.norm(
                                    previous - ends_vector_normalized)
                                d_previous_neg = np.linalg.norm(
                                    previous + ends_vector_normalized)
                                if d_previous < d_previous_neg:
                                    continuous_lift = ends_vector_normalized
                                else:
                                    continuous_lift = -1 * ends_vector_normalized
                                    #print(previous, ends_vector_normalized)

                            else:
                                continuous_lift = ends_vector_normalized  # *random.choice([-1,1])
                                # just to debias it, in the regime of very unbalanced partitions
                                # that touch the empty partition frequently

                        else:
                            continuous_lift = [0, 0]

                        ##############For Debugging#############
                        '''
                        if total_waits > subsequence_timer + subsequence_step_size:

                            last_total_waits = total_waits

                            ends = boundary_ends(part)
                            if ends:
                                ends_vector = np.asarray(ends[1]) - np.asarray(ends[0])
                                ends_vector_normalized = ends_vector / np.linalg.norm(ends_vector)

                                if ends_vectors_normalized_bloated.last:
                                    # We choose the vector that preserves continuity
                                    previous = ends_vectors_normalized_bloated.last_value()
                                    d_previous = np.linalg.norm( ends_vector_normalized - previous)
                                    d_previous_neg = np.linalg.norm( ends_vector_normalized + previous )
                                    if d_previous < d_previous_neg:
                                        continuous_lift_bloated = ends_vector_normalized
                                    else:
                                        continuous_lift_bloated = -1* ends_vector_normalized

                                else:
                                    continuous_lift_bloated = ends_vector_normalized # *random.choice([-1,1])
                                    # just to debias it, in the regime of very unbalanced partitions
                                    # that touch the empty partition frequently

                            else:
                                continuous_lift_bloated = [0,0]
                                '''
                        ################

                        # Pop balance stuff:
                        left_pop, right_pop = part["population"].values()
                        ideal_population = (left_pop + right_pop) / 2
                        left_bal = (left_pop / ideal_population)
                        right_bal = (right_pop / ideal_population)

                        while subsequence_timer < total_waits:
                            subsequence_timer += subsequence_step_size
                            if (continuous_lift == np.asarray([0, 0])).all():
                                lifted_angle = False
                                print("false")
                                draw_other_plots(balances, graph, alignment,
                                                 "NonSimplyConnected", base,
                                                 pop1, part, ns)
                                #Flag to hold the exceptional case of the boundary vanishing
                            else:
                                lifted_angle = np.arctan2(
                                    continuous_lift[1], continuous_lift[0])
                                #+ np.pi
                                ends_vectors_normalized.last_vector = continuous_lift
                            ends_vectors_normalized.append(lifted_angle)

                            if subsequence_timer > balances_burn_in:
                                left_bal_rounded = int(left_bal * 100) / 100
                                right_bal_rounded = int(right_bal * 100) / 100
                                balances[
                                    left_bal_rounded] += 1  #left_bal_rounded
                                balances[
                                    right_bal_rounded] += 1  # right_bal_rounded
                                #NB wait times are accounted for by the while loops

                    for edge in part["cut_edges"]:
                        graph[edge[0]][edge[1]]["cut_times"] += wait_time_rv
                        #print(graph[edge[0]][edge[1]]["cut_times"])

                    if part.flips is not None:
                        f = list(part.flips.keys())[0]

                        graph.nodes[f]["part_sum"] = graph.nodes[f][
                            "part_sum"] - part.assignment[f] * (
                                total_waits - graph.nodes[f]["last_flipped"])
                        graph.nodes[f]["last_flipped"] = total_waits
                        graph.nodes[f]["num_flips"] = graph.nodes[f][
                            "num_flips"] + wait_time_rv

                    t += 1

                    if t % time_between_outputs == 0:

                        #ends_vectors_normalized[1:] #Remove the first one because it will overlap with last one of previous dump

                        identifier_string = "state_after_num_steps" + str(
                            t) + "and_time" + str(st - time.time())

                        #print("finished no", st-time.time())
                        with open(
                                "./plots/" + str(alignment) + "B" +
                                str(int(100 * base)) + "P" +
                                str(int(100 * pop1)) + "wait.txt",
                                'w') as wfile:
                            wfile.write(str(sum(waits)))

                        #with open("./plots/"+str(alignment)+"B"+str(int(100*base))+"P"+str(int(100*pop1)) + "ends_vectors.txt",'w') as wfile:
                        #    wfile.write(str(ends_vectors_normalized))

                        #with open("./plots/"+str(alignment)+"B"+str(int(100*base))+"P"+str(int(100*pop1)) + "ends_vectors.pkl",'wb') as wfile:
                        #    pickle.dump(ends_vectors_normalized, wfile)

                        with open(
                                "./plots/" + str(alignment) + "B" +
                                str(int(100 * base)) + "P" +
                                str(int(100 * pop1)) + "balances.txt",
                                'w') as wfile:
                            wfile.write(str(balances))
                        with open(
                                "./plots/" + str(alignment) + "B" +
                                str(int(100 * base)) + "P" +
                                str(int(100 * pop1)) + "balances.pkl",
                                'wb') as wfile:
                            pickle.dump(balances, wfile)

                        for n in graph.nodes():
                            if graph.nodes[n]["last_flipped"] == 0:
                                graph.nodes[n][
                                    "part_sum"] = total_waits * part.assignment[
                                        n]
                            graph.nodes[n]["lognum_flips"] = math.log(
                                graph.nodes[n]["num_flips"] + 1)

                        total_part_sum = 0
                        for n in graph.nodes():
                            total_part_sum += graph.nodes[n]["part_sum"]

                        for n in graph.nodes():
                            if total_part_sum != 0:
                                graph.nodes[n][
                                    "normalized_part_sum"] = graph.nodes[n][
                                        "part_sum"] / total_part_sum
                            else:
                                graph.nodes[n]["normalized_part_sum"] = 0

                        #print(len(rsw[-1]))
                        #print(graph[(1,0)][(0,1)]["cut_times"])

                        print("creating boundary plot, ", time.time())

                        max_time = ends_vectors_normalized.last.end_time
                        non_simply_connected_intervals = [
                            [x.start_time, x.end_time]
                            for x in ends_vectors_normalized
                            if type(x.data) == bool
                        ]

                        for x in ends_vectors_normalized:
                            if type(x.data) != bool:

                                #times = np.linspace(x.start_time, x.end_time, 100)
                                #times.append(x.end_time)

                                times = [x.start_time, x.end_time]
                                angles = [x.data] * len(times)
                                plt.polar(angles, times, lw=.1, color='b')

                                next_point = x.next
                                #'''
                                if next_point != None:
                                    if type(next_point.data) != bool:
                                        if np.abs(
                                            (x.data - next_point.data)) % (
                                                2 * np.pi) < .1:
                                            # added that last if to avoid
                                            # the big jumps that happen with
                                            # small size subcritical
                                            plt.polar(
                                                [x.data, next_point.data], [
                                                    x.end_time,
                                                    next_point.start_time
                                                ],
                                                lw=.1,
                                                color='b')
                                #'''

                        # Create the regular segments corresponding to time
                        '''
                        for k in range(11):
                            plt.polar ( np.arange(0, (2 * np.pi), 0.01), [int(max_time/10) * k ] * len( np.arange(0, (2 * np.pi), 0.01)), lw = .2, color = 'g' )
                        '''
                        # Create the intervals representing when the partition is null.
                        # Removing these might be just as good, and a little cleaner.
                        #'''
                        for interval in non_simply_connected_intervals:
                            start = interval[0]
                            end = interval[1]
                            for s in np.linspace(start, end, 10):
                                plt.polar(
                                    np.arange(0, (2 * np.pi), 0.01),
                                    s * np.ones(
                                        len(np.arange(0, (2 * np.pi), 0.01))),
                                    lw=.3,
                                    color='r')
                        #'''

                        #plt.savefig("./plots/"+str(alignment)+"B"+str(int(100*base))+"P"+str(int(100*pop1)) + str("proposals_") + str( max_time * subsequence_step_size ) + "boundary_slope.svg")
                        plt.savefig("./plots/" + str(alignment) + "B" +
                                    str(int(100 * base)) + "P" +
                                    str(int(100 * pop1)) + str("proposals_") +
                                    identifier_string + "boundary_slope.png",
                                    dpi=500)

                        # now clear the ends vectors list
                        last = ends_vectors_normalized.last
                        last_non_zero = ends_vectors_normalized.last_non_zero
                        last_vector = ends_vectors_normalized.last_vector
                        # Explicit Garbage collection https://stackoverflow.com/questions/1316767/how-can-i-explicitly-free-memory-in-python

                        print(
                            "finished boundary plot, doing garbage collection",
                            time.time())
                        del ends_vectors_normalized
                        gc.collect()

                        ends_vectors_normalized = LinkedList()
                        ends_vectors_normalized.head = last
                        ends_vectors_normalized.last = last
                        ends_vectors_normalized.last_non_zero = last_non_zero  # can be ahead of head...
                        ends_vectors_normalized.last_vector = last_vector
                        #print(last)

                        print("drawing other plots", time.time())
                        draw_other_plots(balances, graph, alignment,
                                         identifier_string, base, pop1, part,
                                         ns)
                        print("finished drawing other plots, ", time.time())
示例#21
0
uid = "VTD"

graph = Graph.from_geodataframe(df,ignore_errors=True)
print("made graph")
graph.add_data(df,list(df))
graph = nx.relabel_nodes(graph, df[uid])
counties = (set(list(df[county_col])))
countydict = dict(graph.nodes(data=county_col))

starting_partition = GeographicPartition(
    graph,
    assignment="2011_PLA_1",
    updaters={
        "polsby_popper" : polsby_popper,
        "cut_edges": cut_edges,
        "population": Tally(pop_col, alias="population"),

    }
)

county_edge_count = {}
for i in counties:
    county_graph = graph.subgraph([n for n,v in graph.nodes(data = True) if v[county_col] == i])
    total_edges = len(county_graph.edges())
    county_edge_count[i] = total_edges

def county_splits_dict(partition):
    """
    From a partition, generates a dictionary of counter dictionaries.

    Args: 
        parent = partition.parent

        if not parent:
            return 0

        return parent["step_num"] + 1


    bnodes = [x for x in graph.nodes() if graph.node[x]["boundary_node"] == 1]


    def bnodes_p(partition):
        return [x for x in graph.nodes() if graph.node[x]["boundary_node"] == 1]


    updaters = {'population': Tally('population'),
                "boundary": bnodes_p,
                'cut_edges': cut_edges,
                'step_num': step_num,
                'b_nodes': b_nodes_bi,
                'base': new_base,
                'geom': geom_wait,
                # "Pink-Purple": Election("Pink-Purple", {"Pink":"pink","Purple":"purple"})
                }

    #########BUILD PARTITION
    # building partition dicitionary (assignment)
    # [ 1,2,3], [4,5,6] ... the dictionary will : {1 : 0, 2 : 0, ..., 5 : 1, 6 : 1 }$...
    partition_dict = {}
    partition_block = []
    partition_block = my_mst_kpartition_tree_random(graph, pop_col="population", pop_target=0, epsilon=0.05,
    TOT_WORKERS = args.workers

    manager = Manager()
    results = manager.dict()

    race_matrix = load_data(CITY_NAME, STATE, STATE_FIPS)

    # build chain
    graph = Graph.from_geodataframe(race_matrix, adjacency="queen")
    nx.set_node_attributes(graph,
                           race_matrix["total"].to_dict(),
                           name="population")
    init_partition = Partition(
        graph,
        assignment=race_matrix.to_dict()["partition"],
        updaters={"population": Tally("population")},
    )

    # validators
    def mean_pop(part):
        return np.mean(list(part["population"].values()))

    def min_pop(part):
        return min(list(part["population"].values()))

    def sd_pop(part):
        return np.std(list(part["population"].values()))

    mean_one_sd_up = mean_pop(init_partition) + (2 /
                                                 3) * sd_pop(init_partition)
    mean_one_sd_down = mean_pop(init_partition) - (2 /
示例#24
0
import numpy as np
import math
import matplotlib.pyplot as plt
from sklearn import manifold

map_json_file = "res/PA_VTD.json"
vtd_column = "VTDST10"
population_column = "TOT_POP"
popper_column = "pop_percent"
plans = dict([("2011", "2011_PLA_1"), ("GOV", "GOV"), ("TS", "TS"),
              ("REM", "REMEDIAL_P"), ("CPCT", "538CPCT__1"),
              ("DEM", "538DEM_PL"), ("REP", "538GOP_PL"),
              ("8TH", "8THGRADE_1")])
weight_by_population = True

myupdaters = {"population": Tally(population_column, alias="population")}


def rel_entropy(graph, X, Y):
    tot_pop = sum([graph.nodes[n][population_column] for n in graph.nodes()
                   ]) if weight_by_population else len(graph.nodes())
    res = 0
    for j, Yj in Y.parts.items():
        qj_pop = sum([graph.nodes[n][population_column]
                      for n in Yj]) if weight_by_population else len(Yj)
        entropy = 0
        for i, Xi in X.parts.items():
            p = sum([
                graph.nodes[n][population_column] for n in Yj & Xi
            ]) / qj_pop if weight_by_population else len(Yj & Xi) / qj_pop
            if p == 0: continue
def main(config_data, id):
    """Runs a single experiment with the given config file. Loads a graph,
    runs a Chain to search for a Gerrymander, metamanders around that partition,
    runs another chain, and then saves the generated data.

    Args:
        config_data (Object): configuration of experiment loaded from JSON file
        id (String): id of experiment, used in tags to differentiate between
        experiments
    """
    try:
        timeBeg = time.time()
        print('Experiment', id, 'has begun')
        # Save configuration into global variable
        global config
        config = config_data

        # Get graph and dual graph
        graph, dual = preprocessing(config["INPUT_GRAPH_FILENAME"])
        # List of districts in original graph
        parts = list(
            set([
                graph.nodes[node][config['ASSIGN_COL']]
                for node in graph.nodes()
            ]))
        # Ideal population of districts
        ideal_pop = sum(
            [graph.nodes[node][config['POP_COL']]
             for node in graph.nodes()]) / len(parts)
        # Initialize partition
        election = Election(config['ELECTION_NAME'], {
            'PartyA': config['PARTY_A_COL'],
            'PartyB': config['PARTY_B_COL']
        })

        updaters = {
            'population': Tally(config['POP_COL']),
            'cut_edges': cut_edges,
            config['ELECTION_NAME']: election
        }

        partition = Partition(graph=graph,
                              assignment=config['ASSIGN_COL'],
                              updaters=updaters)
        # Run Chain to search for a gerrymander, and get it
        mander = run_chain(partition, config['CHAIN_TYPE'],
                           config['FIND_GERRY_LENGTH'], ideal_pop, id + 'a',
                           config['ORIG_RUN_STATS_TAG'] + id)
        savePartition(mander, config['LEFT_MANDER_TAG'] + id)
        # Metamanders around the found gerrymander
        metamander_around_partition(mander, dual, config['TARGET_TAG'] + id,
                                    config['SECRET'], config['META_PARAM'])
        # Refresh assignment and election of partition
        updaters[config['ELECTION_NAME']] = Election(
            config['ELECTION_NAME'], {
                'PartyA': config['PARTY_A_COL'],
                'PartyB': config['PARTY_B_COL']
            })
        partition = Partition(graph=graph,
                              assignment=config['ASSIGN_COL'],
                              updaters=updaters)
        # Run chain again
        run_chain(partition, config['CHAIN_TYPE'],
                  config['SAMPLE_META_LENGTH'], ideal_pop, id + 'b',
                  config['GERRY_RUN_STATS_TAG'] + id)
        # Save data from experiment to JSON files
        drawGraph(partition.graph, 'cut_times',
                  config['GRAPH_TAG'] + '_single_raw_' + id)
        drawGraph(partition.graph, 'sibling_cuts',
                  config['GRAPH_TAG'] + '_single_adjusted_' + id)
        drawDoubleGraph(partition.graph, 'cut_times',
                        config['GRAPH_TAG'] + '_double_raw_' + id)
        drawDoubleGraph(partition.graph, 'sibling_cuts',
                        config['GRAPH_TAG'] + '_double_adjusted_' + id)
        saveGraphStatistics(partition.graph,
                            config['GRAPH_STATISTICS_TAG'] + id)

        print('Experiment {} completed in {:.2f} seconds'.format(
            id,
            time.time() - timeBeg))
    except Exception as e:
        # Print notification if any experiment fails to complete
        track = traceback.format_exc()
        print(track)
        print('Experiment {} failed to complete after {:.2f} seconds'.format(
            id,
            time.time() - timeBeg))