def __init__(self, x_lim=(100, 900), y_lim=(100, 900), step_min=10, step_max=100, pop_mean=None, pop_std=None): print('initializing DistopiaEnvironment') self.x_min, self.x_max = x_lim self.y_min, self.y_max = y_lim self.step = 1 self.step_min = step_min self.step_max = step_max self.pop_mean = pop_mean self.pop_std = pop_std self.occupied = set() self.coord_generator = self.gencoordinates(self.x_min, self.x_max, self.y_min, self.y_max) self.evaluator = VoronoiAgent() self.evaluator.load_data() self.state = {} self.mean_array = self.std_array = None self.subsample_scale = 1 # defaults to 1->no subsampling. # Usage of subsampling: if subsample scale of n, then starting from xmin and ymin, # block positions can only be at coordinates at n pixels intervals assert self.step % self.subsample_scale == 0 assert self.step >= self.subsample_scale
def __init__(self, x_lim=(100, 900), y_lim=(100, 900), step_size=5, step_min=50, step_max=100, metrics=[], task=[], pop_mean=None, pop_std=None): self.x_min, self.x_max = x_lim self.y_min, self.y_max = y_lim self.step = step_size self.step_min = step_min self.step_max = step_max self.pop_mean = pop_mean self.pop_std = pop_std self.occupied = set() self.coord_generator = self.gencoordinates(self.x_min, self.x_max, self.y_min, self.y_max) self.evaluator = VoronoiAgent() self.evaluator.load_data() if metrics == []: self.set_metrics(self.evaluator.metrics) else: for m in metrics: assert m in self.evaluator.metrics self.set_metrics(metrics) if task == []: self.set_task([1 for i in range(len(self.metrics))]) else: assert len(task) == len(self.metrics) self.set_task(task)
class RewardEvaluator: objective_ids = [] def setup_voronoi(self): self.voronoi = VoronoiAgent() self.voronoi.load_data() def setup_objectives(self, objectives): self.objective_ids = [] try: for objective in objectives: self.objective_ids.append( self.voronoi.metrics.index(objective)) except ValueError: raise ValueError( "Trying to optimize on {} but it doesn't exist!".format( objective)) def evaluate(self, observation): try: state_metrics, district_metrics = self.voronoi.compute_voronoi_metrics( observation) except Exception as e: print("Couldn't compute Voronoi for {}:{}".format(observation, e)) return False try: return self.calculate_reward(state_metrics, district_metrics) #print("{}:{}".format(self.n_calls,cost)) except ValueError as v: print("Problem calculating the metrics: {}".format(v)) return False def calculate_reward_range(self, info): raise NotImplementedError def calculate_reward(self, state_metrics, district_metrics): raise NotImplementedError def extract_objectives(self, districts, objectives=None): if objectives is None: objectives = self.objective_ids all_objectives = {} if len(districts) < 1: raise ValueError("No Districts") for obj in objectives: objective_vals = [] for d in districts: data = districts[d][obj].get_data() if len(data['data']) < 1: raise ValueError("Empty District {}".format(d)) else: objective_vals.append(data['scalar_value']) all_objectives[obj] = objective_vals return all_objectives
def designs2mat_labelled_helper(designs, task, progress_queue): temp_voronoi = VoronoiAgent() temp_voronoi.load_data() x = np.zeros((len(designs), 72, 8)) y = np.zeros((len(designs), 5)) for i, design in enumerate(designs): # should work, since it is pass-by-object-reference x[i, :, :] = DistopiaData.static_fiducials2district_mat( design, voronoi=temp_voronoi) y[i, :] = DistopiaData.task_str2arr(task) progress_queue.put(1) return x, y
def setup_voronoi(self): self.voronoi = VoronoiAgent() self.voronoi.load_data()
class RFBO: max_cost = 1e9 ''' random forest bayes minimization takes a single objective for now, e.g. 'name' ''' def __init__(self,objective,objective_fn,n_fiducials=4): self.init_agent() self.objective_fn = objective_fn self.objective_name = objective self.n_fiducials=n_fiducials try: self.objective_id = self.agent.metrics.index(objective) except ValueError: print("Trying to optimize on {} but it doesn't exist!".format(objective)) raise self.init_cost_dimensions() self.n_calls = 0 def init_agent(self): self.agent = VoronoiAgent() self.agent.load_data() self.width, self.height = self.agent.screen_size def init_cost_dimensions(self): self.cost_dimensions = [] for i in range(self.n_fiducials): self.cost_dimensions.append((0,self.width)) self.cost_dimensions.append((0,self.height)) def cost_function(self,x): ''' x is a tuple of fiducial x,y coords, e.g (x0,y0,x1,y1...) ''' assert len(x) == 2*self.n_fiducials self.n_calls += 1 fids = {} for i in range(0,len(x),2): fids[i//2] = [(x[i],x[i+1])] try: state_m,district_m = self.agent.compute_voronoi_metrics(fids) except Exception: print("Couldn't compute Voronoi for {}".format(fids)) #raise return self.max_cost try: objectives = self.extract_objective(district_m) cost = self.objective_fn(objectives) print("{}:{}".format(self.n_calls,cost)) except ValueError as v: print(v) cost = self.max_cost return cost def extract_objective(self,districts): objective_vals = [] if len(districts) < 1: raise ValueError("No Districts") for d in districts: data = districts[d][self.objective_id].get_data() if len(data['data']) < 1: raise ValueError("Empty District") else: objective_vals.append(data['scalar_value']) return objective_vals def minimize(self, max_iters=100): return forest_minimize(self.cost_function, self.cost_dimensions, n_calls=max_iters)
def init_agent(self): self.agent = VoronoiAgent() self.agent.load_data() self.width, self.height = self.agent.screen_size
class DistopiaEnvironment(Environment): # want to define the voronoi agent for districts and metrics calculations # scalar_value is mean over districts # scalar_std is standard deviation between districts # scalar_maximum is max over districts # s is state metric object (for this metric) # d is list of district objects (for all metrics) metric_extractors = { # overall normalization plan: run one-hots in either direction to get rough bounds # then z-normalize and trim on edges # 'population' : lambda s,d : s.scalar_std, # standard deviation of each district's total populations (-1) # normalization: [0, single_district std] 'population': lambda s, d: np.std( [dm.metrics['population'].scalar_value for dm in d]), # mean of district margin of victories (-1) # normalization: [0,1] 'pvi': lambda s, d: s.scalar_maximum, # minimum compactness among districts (maximize the minimum compactness, penalize non-compactness) (+1) # normalization: [0,1] 'compactness': lambda s, d: np.mean( [dm.metrics['compactness'].scalar_value for dm in d]), # mean ratio of democrats over all voters in each district (could go either way) # normalization: [0,1] 'projected_votes': lambda s, d: np.mean([ dm.metrics['projected_votes'].scalar_value / dm.metrics[ 'projected_votes'].scalar_maximum for dm in d ]), # std of ratio of nonminority to minority over districts # normalization: [0, ] 'race': lambda s, d: np.std([ dm.metrics['race'].scalar_value / dm.metrics['race'].scalar_maximum for dm in d ]), # scalar value is std of counties within each district. we take a max (-1) to minimize variance within district (communities of interest) 'income': lambda s, d: np.max([dm.metrics['income'].scalar_value for dm in d]), # 'education' : lambda s,d : s.scalar_std, # maximum sized district (-1) to minimize difficulty of access # normalization [0,size of wisconsin] 'area': lambda s, d: s.scalar_maximum } json_metric_extractors = { # overall normalization plan: run one-hots in either direction to get rough bounds # then z-normalize and trim on edges # 'population' : lambda s,d : s.scalar_std, # standard deviation of each district's total populations (-1) # normalization: [0, single_district std] #'population': lambda s, d: np.std([dm['metrics']['population']['scalar_value'] for dm in d]), 'population': lambda s, d: np.std([[ m['scalar_value'] for m in dm['metrics'] if m['name'] == 'population' ][0] for dm in d]), # mean of district margin of victories (-1) # normalization: [0,1] 'pvi': lambda s, d: s['scalar_maximum'], # minimum compactness among districts (maximize the minimum compactness, penalize non-compactness) (+1) # normalization: [0,1] #'compactness': lambda s, d: np.min([dm['metrics']['compactness']['scalar_value'] for dm in d]), 'compactness': lambda s, d: np.mean([[ m['scalar_value'] for m in dm['metrics'] if m['name'] == 'compactness' ][0] for dm in d]), # TODO: change compactness from min to avg # mean ratio of democrats over all voters in each district (could go either way) # normalization: [0,1] 'projected_votes': lambda s, d: np.mean([[ m['scalar_value'] / m['scalar_maximum'] for m in dm['metrics'] if m['name'] == 'projected_votes' ][0] for dm in d]), #[dm['metrics']['projected_votes']['scalar_value'] / dm['metrics']['projected_votes']['scalar_maximum'] for dm in d]), # std of ratio of nonminority to minority over districts # normalization: [0, ] 'race': lambda s, d: np.std( [[m['scalar_value'] for m in dm['metrics'] if m['name'] == 'race'][0] for dm in d]), #lambda s, d: np.std([dm['metrics']['race']['scalar_value'] / dm['metrics']['race']['scalar_maximum'] for dm in d]), # scalar value is std of counties within each district. we take a max (-1) to minimize variance within district (communities of interest) 'income': lambda s, d: np.max( [dm['metrics']['income']['scalar_value'] for dm in d]), # 'education' : lambda s,d : s.scalar_std, # maximum sized district (-1) to minimize difficulty of access # normalization [0,size of wisconsin] 'area': lambda s, d: s['scalar_maximum'] } def __init__(self, x_lim=(100, 900), y_lim=(100, 900), step_min=10, step_max=100, pop_mean=None, pop_std=None): print('initializing DistopiaEnvironment') self.x_min, self.x_max = x_lim self.y_min, self.y_max = y_lim self.step = 1 self.step_min = step_min self.step_max = step_max self.pop_mean = pop_mean self.pop_std = pop_std self.occupied = set() self.coord_generator = self.gencoordinates(self.x_min, self.x_max, self.y_min, self.y_max) self.evaluator = VoronoiAgent() self.evaluator.load_data() self.state = {} self.mean_array = self.std_array = None self.subsample_scale = 1 # defaults to 1->no subsampling. # Usage of subsampling: if subsample scale of n, then starting from xmin and ymin, # block positions can only be at coordinates at n pixels intervals assert self.step % self.subsample_scale == 0 assert self.step >= self.subsample_scale def set_normalization(self, standard_file, st_metrics): #Only reason this is a seperate method is so that I can instantiate distopia_environment #and set up normalization without doing anything else in distopia_human_logs_processor with open(standard_file, 'rb') as f: self.mean_array, self.std_array = pickle.load(f) if type(self.mean_array) is not np.ndarray: self.mean_array = np.array(self.mean_array) if type(self.std_array) is not np.ndarray: self.std_array = np.array(self.std_array) # cut down the metrics to the ones that we are using. # this is not going to work if they are out of order # TODO: generalize this, maybe by adding metadata to the data file self.mean_array = self.mean_array[:len(st_metrics)] self.std_array = self.std_array[:len(st_metrics)] def set_params(self, specs_dict): metrics = specs_dict['metrics'] if metrics == []: self.set_metrics(self.evaluator.metrics) else: for m in metrics: assert m in self.evaluator.metrics self.set_metrics(metrics) if 'standardization_file' in specs_dict and specs_dict[ 'standardization_file'] is not None: # hopefully the above condition short-circuits self.set_normalization(specs_dict['standardization_file'], self.metrics) if 'subsample_scale' in specs_dict: self.subsample_scale = specs_dict['subsample_scale'] self.step = self.subsample_scale assert self.step % self.subsample_scale == 0 assert self.step >= self.subsample_scale print("subsample scale environment: " + str(self.subsample_scale)) def gencoordinates(self, m, n, j, k): '''Generate random coordinates in range x: (m,n) y:(j,k) instantiate generator and call next(g) based on: https://stackoverflow.com/questions/30890434/how-to-generate-random-pairs-of- numbers-in-python-including-pairs-with-one-entr MODIFIED: To handle the case where the map is subsampled to reduce state space for the SARSA agent ''' seen = self.occupied x_range = np.arange(m, n + 1, self.subsample_scale) y_range = np.arange(j, k + 1, self.subsample_scale) # print("IN GENCOORDINATES, THE XRANGE IS ", x_range) x, y = np.random.choice(x_range), np.random.choice(y_range) while True: while (x, y) in seen: x, y = np.random.choice(x_range), np.random.choice(y_range) seen.add((x, y)) yield (x, y) return def set_metrics(self, metrics): '''Define an array of metric names ''' self.metrics = metrics def seed(self, seed): np.random.seed(seed) def take_step(self, new_state): self.state = new_state def reset(self, initial=None, n_districts=8, max_blocks_per_district=5): '''Initialize the state randomly. ''' #Generator must be reset bc environment is reset at every episode self.coord_generator = self.gencoordinates(self.x_min, self.x_max, self.y_min, self.y_max) if initial is not None: self.state = initial self.occupied = set(itertools.chain(*self.state.values())) return self.state else: # print("resetting to initial state....") while True: self.occupied = set() self.state = {} # Place one block for each district, randomly for i in range(n_districts): self.state[i] = [next(self.coord_generator)] initial_blocks = [p[0] for p in self.state.values()] # add more blocks... # for i in range(n_districts): # # generate at most max_blocks_per_district new blocks per district # # district_blocks = set(self.state[i]) # district_centroid = self.state[i][0] # other_blocks = np.array(initial_blocks[:i] + [(float('inf'), float('inf'))] + initial_blocks[i + 1:]) # # distances = np.sqrt(np.sum(np.square(other_blocks - district_centroid), axis=1)) # distances = np.linalg.norm(other_blocks - district_centroid, axis=1) # assert len(distances) == len(other_blocks) # closest_pt_idx = np.argmin(distances) # # closest_pt = other_blocks[closest_pt_idx] # max_radius = distances[closest_pt_idx]/2 # for j in range(max(0, randint(0, max_blocks_per_district-1))): # dist = np.random.uniform(0, max_radius) # angle = np.random.uniform(0,2*np.pi) # new_block = district_centroid + np.array((dist*np.cos(angle),dist*np.sin(angle))) # new_block_coords = (new_block[0], new_block[1]) # max_tries = 10 # tries = 0 # while new_block_coords in self.occupied and tries < max_tries: # tries += 1 # dist = np.random.uniform(0, max_radius) # angle = np.random.uniform(0, 2 * np.pi) # new_block = district_centroid + (dist * np.cos(angle), dist * np.sin(angle)) # new_block_coords = (int(new_block[0]), int(new_block[1])) # if tries < max_tries: # self.state[i].append(new_block_coords) # self.occupied.add(new_block_coords) if self.get_metrics(self.state) is not None: return self.state # print("initial state resetted, printing the current positions") # print(self.state) def get_neighborhood(self, n_steps): '''Get all the configs that have one block n_steps away from the current ''' neighborhood = [] state = self.state for district_id, district in state.items(): for block_id, block in enumerate(district): neighborhood += self.get_neighbors(district_id, block_id) return neighborhood def get_sampled_neighborhood(self, n_blocks, n_directions, resample=False): '''Sample n_blocks * n_direction neighbors. take n blocks, and move each one according to m direction/angle pairs ignore samples that are prima facie invalid (out of bounds or overlaps) if resample is true, then sample until we have n_blocks * n_directions otherwise, just try that many times. ''' neighbors = [] n_districts = len(self.state) for i in range(n_blocks): # sample from districts, then blocks # this biases blocks in districts with fewer blocks # i think this is similar to how humans work however district_id = np.random.randint(n_districts) district = self.state[district_id] block_id = np.random.randint(len(district)) x, y = district[block_id] for j in range(n_directions): mx, my = self.get_random_move(x, y) valid_move = self.check_boundaries( mx, my) and (mx, my) not in self.occupied if valid_move: neighbor = {k: list(val) for k, val in self.state.items()} neighbor[district_id][block_id] = (mx, my) neighbors.append(neighbor) elif resample == True: # don't use this yet, need to add a max_tries? while not valid_move: mx, my = self.get_random_move(x, y) valid_move = self.check_boundaries(mx, my) return neighbors def make_move(self, block_to_move, direction, is_random): """Moves the specified block in the specified direction, return the new design""" if is_random == False or is_random == True: moves = [ np.array((self.step, 0)), np.array((-self.step, 0)), np.array((0, self.step)), np.array((0, -self.step)) ] # else: # moves = [np.array((4*self.step, 0)), np.array((-4*self.step, 0)), # np.array((0, 4*self.step)), np.array((0, -4*self.step))] constraints = [ lambda x, y: x < self.x_max, lambda x, y: x > self.x_min, lambda x, y: y < self.y_max, lambda x, y: y > self.y_min ] move = moves[direction] x, y = self.state[block_to_move][ 0] # here assuming each district only holds one block mx, my = (x, y) + move if constraints[direction](mx, my) and (mx, my) not in self.occupied: new_state = deepcopy(self.state) new_state[block_to_move][0] = (mx, my) return new_state else: return -1 def get_boundaries(self): return [self.x_min, self.x_max, self.y_min, self.y_max] def get_random_move(self, x, y): dist, angle = (np.random.randint(self.step_min, self.step_max), np.random.uniform(2 * np.pi)) return (int(x + np.cos(angle) * dist), int(y + np.sin(angle) * dist)) def check_boundaries(self, x, y): '''Return true if inside screen boundaries ''' if x < self.x_min or x > self.x_max: return False if y < self.y_min or y > self.y_max: return False return True def get_neighbors(self, district, block): '''Get all the designs that move "block" by one step. ignores moves to coords that are occupied or out of bounds ''' neighbors = [] moves = [ np.array((self.step, 0)), np.array((-self.step, 0)), np.array((0, self.step)), np.array((0, -self.step)) ] constraints = [ lambda x, y: x < self.x_max, lambda x, y: x > self.x_min, lambda x, y: y < self.y_max, lambda x, y: y > self.y_min ] x, y = self.state[district][block] for i, move in enumerate(moves): mx, my = (x, y) + move if constraints[i](mx, my) and (mx, my) not in self.occupied: new_neighbor = deepcopy(self.state) new_neighbor[district][block] = (mx, my) neighbors.append(new_neighbor) return neighbors def check_legal_districts(self, districts): if len(districts) == 0: return False # TODO: consider checking for len == 8 here as well for d in districts: if len(d.precincts) == 0: return False return True def get_metrics(self, design, exc_logger=None): '''Get the vector of metrics associated with a design returns m-length np array ''' try: districts = self.evaluator.get_voronoi_districts(design) state_metrics, districts = self.evaluator.compute_voronoi_metrics( districts) except ColliderException: if exc_logger is not None: exc_logger.write(str(design) + '\n') else: print("Collider Exception!") return None except AssertionError as e: if exc_logger is not None: exc_logger.write(str(design) + '\n') else: print("Assertion failed: {}".format(e.args)) return None if not self.check_legal_districts(districts): return None return self.extract_metrics(self.metrics, state_metrics, districts) # metric_dict = {} # for state_metric in state_metrics: # metric_name = state_metric.name # if metric_name in self.metrics: # metric_dict[metric_name] = self.metric_extractors[metric_name](state_metric, districts) # metrics = np.array([metric_dict[metric] for metric in self.metrics]) # return metrics @staticmethod def extract_metrics(metric_names, state_metrics, districts, from_json=False): metric_dict = dict() for state_metric in state_metrics: if from_json: metric_name = state_metric["name"] if metric_name in metric_names: metric_dict[ metric_name] = DistopiaEnvironment.json_metric_extractors[ metric_name](state_metric, districts) else: metric_name = state_metric.name if metric_name in metric_names: metric_dict[ metric_name] = DistopiaEnvironment.metric_extractors[ metric_name](state_metric, districts) metrics = np.array([metric_dict[metric] for metric in metric_names]) return metrics def get_reward(self, metrics, reward_weights): '''Get the scalar reward associated with metrics ''' if metrics is None: return float("-inf") else: return np.dot(reward_weights, self.standardize_metrics(metrics)) def standardize_metrics(self, metrics): '''Standardizes the metrics if standardization stats have been provided. ''' if self.mean_array is None or self.std_array is None: return metrics else: if type(metrics) is not np.ndarray: metrics = np.array(metrics) return (metrics - self.mean_array) / self.std_array def destandardize_metrics(self, metrics): '''Undo's standardization ''' if self.mean_array is None or self.std_array is None: return metrics else: if type(metrics) is not np.ndarray: metrics = np.array(metrics) return metrics * self.std_array + self.mean_array def fixed2dict(self, fixed_arr): '''Convert a fixed array of nx8 to an 8 district dict The fixed_arr should be in form [x0,y0,x1,y1...] Strips out zeros ''' dist_dict = dict() assert len(fixed_arr) % 8 == 0 blocks_per_dist = len(fixed_arr) // 8 for i in range(len(fixed_arr), 2): x = fixed_arr[i] y = fixed_arr[i + 1] district = i // blocks_per_dist if district in dist_dict: dist_dict[district].append((x, y)) else: dist_dict[district] = [(x, y)] return dist_dict def dict2fixed(self, dist_dict, blocks_per_dist): '''Convert a district dict into a fixed-width array (zero-padded) ''' fixed = [] for district, blocks in dist_dict.items(): n_to_pad = blocks_per_dist - len(blocks) assert n_to_pad >= 0 for block in blocks: fixed.append(block[0]) fixed.append(block[1]) for i in range(n_to_pad): fixed.append(0.0) # x fixed.append(0.0) # y return fixed
import distopia from distopia.app.agent import VoronoiAgent if __name__ == '__main__': import time import random agent = VoronoiAgent() agent.load_data() print('data loaded') w, h = agent.screen_size t = [ 0, ] * 10 for i in range(len(t)): ts = time.clock() fids = { i: [(random.random() * w, random.random() * h)] for i in range(4) } print(fids) try: state_v, district_v = agent.compute_voronoi_metrics(fids) for d in district_v: print("District {}:\n".format(d)) for stat in district_v[d]: print("\t{}\n".format(stat.get_data())) except Exception: print("Couldn't compute Vornoi for {}".format(fids)) raise t[i] = time.clock() - ts
class GreedyAgent(DistopiaAgent): # scalar_value is mean over districts # scalar_std is standard deviation between districts # scalar_maximum is max over districts # s is state metric object (for this metric) # d is list of district objects (for all metrics) metric_extractors = { #overall normalization plan: run one-hots in either direction to get rough bounds # then z-normalize and trim on edges #'population' : lambda s,d : s.scalar_std, # standard deviation of each district's total populations (-1) # normalization: [0, single_district std] 'population': lambda s, d: np.std( [dm.metrics['population'].scalar_value for dm in d]), # mean of district margin of victories (-1) # normalization: [0,1] 'pvi': lambda s, d: s.scalar_maximum, # minimum compactness among districts (maximize the minimum compactness, penalize non-compactness) (+1) # normalization: [0,1] 'compactness': lambda s, d: np.min( [dm.metrics['compactness'].scalar_value for dm in d]), # mean ratio of democrats over all voters in each district (could go either way) # normalization: [0,1] 'projected_votes': lambda s, d: np.mean([ dm.metrics['projected_votes'].scalar_value / dm.metrics[ 'projected_votes'].scalar_maximum for dm in d ]), # std of ratio of nonminority to minority over districts # normalization: [0, ] 'race': lambda s, d: np.std([ dm.metrics['race'].scalar_value / dm.metrics['race'].scalar_maximum for dm in d ]), # scalar value is std of counties within each district. we take a max (-1) to minimize variance within district (communities of interest) 'income': lambda s, d: np.max([dm.metrics['income'].scalar_value for dm in d]), #'education' : lambda s,d : s.scalar_std, # maximum sized district (-1) to minimize difficulty of access # normalization [0,size of wisconsin] 'area': lambda s, d: s.scalar_maximum } def __init__(self, x_lim=(100, 900), y_lim=(100, 900), step_size=5, step_min=50, step_max=100, metrics=[], task=[], pop_mean=None, pop_std=None): self.x_min, self.x_max = x_lim self.y_min, self.y_max = y_lim self.step = step_size self.step_min = step_min self.step_max = step_max self.pop_mean = pop_mean self.pop_std = pop_std self.occupied = set() self.coord_generator = self.gencoordinates(self.x_min, self.x_max, self.y_min, self.y_max) self.evaluator = VoronoiAgent() self.evaluator.load_data() if metrics == []: self.set_metrics(self.evaluator.metrics) else: for m in metrics: assert m in self.evaluator.metrics self.set_metrics(metrics) if task == []: self.set_task([1 for i in range(len(self.metrics))]) else: assert len(task) == len(self.metrics) self.set_task(task) def gencoordinates(self, m, n, j, k): '''Generate random coordinates in range x: (m,n) y:(j,k) instantiate generator and call next(g) based on: https://stackoverflow.com/questions/30890434/how-to-generate-random-pairs-of- numbers-in-python-including-pairs-with-one-entr ''' seen = self.occupied x, y = randint(m, n), randint(j, k) while True: while (x, y) in seen: x, y = randint(m, n), randint(m, n) seen.add((x, y)) yield (x, y) return def set_metrics(self, metrics): '''Define an array of metric names ''' self.metrics = metrics def set_task(self, task): self.reward_weights = task def reset(self, initial=None, n_districts=8, max_blocks_per_district=5): '''Initialize the state randomly. ''' if initial is not None: self.state = initial self.occupied = set(itertools.chain(*self.state.values())) return self.state else: self.occupied = set() self.state = {} # Place one block for each district, randomly for i in range(n_districts): self.state[i] = [next(self.coord_generator)] initial_blocks = [p[0] for p in self.state.values()] # add more blocks... for i in range(n_districts): # generate at most max_blocks_per_district new blocks per district # district_blocks = set(self.state[i]) district_centroid = self.state[i][0] other_blocks = np.array(initial_blocks[:i] + [(float('inf'), float('inf'))] + initial_blocks[i + 1:]) # distances = np.sqrt(np.sum(np.square(other_blocks - district_centroid), axis=1)) distances = np.linalg.norm(other_blocks - district_centroid, axis=1) assert len(distances) == len(other_blocks) closest_pt_idx = np.argmin(distances) # closest_pt = other_blocks[closest_pt_idx] max_radius = distances[closest_pt_idx] / 2 for j in range(max(0, randint(0, max_blocks_per_district - 1))): dist = np.random.uniform(0, max_radius) angle = np.random.uniform(0, 2 * np.pi) new_block = district_centroid + np.array( (dist * np.cos(angle), dist * np.sin(angle))) new_block_coords = (new_block[0], new_block[1]) max_tries = 10 tries = 0 while new_block_coords in self.occupied and tries < max_tries: tries += 1 dist = np.random.uniform(0, max_radius) angle = np.random.uniform(0, 2 * np.pi) new_block = district_centroid + (dist * np.cos(angle), dist * np.sin(angle)) new_block_coords = (int(new_block[0]), int(new_block[1])) if tries < max_tries: self.state[i].append(new_block_coords) self.occupied.add(new_block_coords) return self.state def get_neighborhood(self, n_steps): '''Get all the configs that have one block n_steps away from the current ''' neighborhood = [] state = self.state for district_id, district in state.items(): for block_id, block in enumerate(district): neighborhood += self.get_neighbors(district_id, block_id) return neighborhood def get_sampled_neighborhood(self, n_blocks, n_directions, resample=False): '''Sample n_blocks * n_direction neighbors. take n blocks, and move each one according to m direction/angle pairs ignore samples that are prima facie invalid (out of bounds or overlaps) if resample is true, then sample until we have n_blocks * n_directions otherwise, just try that many times. ''' neighbors = [] n_districts = len(self.state) for i in range(n_blocks): # sample from districts, then blocks # this biases blocks in districts with fewer blocks # i think this is similar to how humans work however district_id = np.random.randint(n_districts) district = self.state[district_id] block_id = np.random.randint(len(district)) x, y = district[block_id] for j in range(n_directions): mx, my = self.get_random_move(x, y) valid_move = self.check_boundaries( mx, my) and (mx, my) not in self.occupied if valid_move: neighbor = {k: list(val) for k, val in self.state.items()} neighbor[district_id][block_id] = (mx, my) neighbors.append(neighbor) elif resample == True: # don't use this yet, need to add a max_tries? while not valid_move: mx, my = self.get_random_move(x, y) valid_move = self.check_boundaries(mx, my) return neighbors def get_random_move(self, x, y): dist, angle = (np.random.randint(self.step_min, self.step_max), np.random.uniform(2 * np.pi)) return (int(x + np.cos(angle) * dist), int(y + np.sin(angle) * dist)) def check_boundaries(self, x, y): '''Return true if inside screen boundaries ''' if x < self.x_min or x > self.x_max: return False if y < self.y_min or y > self.y_max: return False return True def get_neighbors(self, district, block): '''Get all the designs that move "block" by one step. ignores moves to coords that are occupied or out of bounds ''' neighbors = [] moves = [ np.array((self.step, 0)), np.array((-self.step, 0)), np.array((0, self.step)), np.array((0, -self.step)) ] constraints = [ lambda x, y: x < self.x_max, lambda x, y: x > self.x_min, lambda x, y: y < self.y_max, lambda x, y: y > self.y_min ] x, y = self.state[district][block] for i, move in enumerate(moves): mx, my = (x, y) + move if constraints[i](mx, my) and (mx, my) not in self.occupied: new_neighbor = deepcopy(self.state) new_neighbor[district][block] = (mx, my) neighbors.append(new_neighbor) return neighbors def check_legal_districts(self, districts): if len(districts) == 0: return False # TODO: consider checking for len == 8 here as well for d in districts: if len(d.precincts) == 0: return False return True def get_metrics(self, design, exc_logger=None): '''Get the vector of metrics associated with a design returns m-length np array ''' try: districts = self.evaluator.get_voronoi_districts(design) state_metrics, districts = self.evaluator.compute_voronoi_metrics( districts) except ColliderException: if exc_logger is not None: exc_logger.write(str(design) + '\n') else: print("Collider Exception!") return None if not self.check_legal_districts(districts): return None metric_dict = {} for state_metric in state_metrics: metric_name = state_metric.name if metric_name in self.metrics: metric_dict[metric_name] = self.metric_extractors[metric_name]( state_metric, districts) metrics = np.array([metric_dict[metric] for metric in self.metrics]) #metrics = np.array([self.metric_extractors[metric](state_metrics, districts) for metric in self.metrics]) return metrics def get_reward(self, metrics): '''Get the scalar reward associated with metrics ''' if metrics is None: return float("-inf") else: return np.dot(self.reward_weights, self.standardize_metrics(metrics)) def standardize_metrics(self, metrics): '''Standardizes the metrics if standardization stats have been provided. ''' if self.pop_mean is None or self.pop_std is None: return metrics else: return (metrics - self.pop_mean) / self.pop_std def run(self, n_steps, logger=None, exc_logger=None, status=None, initial=None, eps=0.8, eps_decay=0.9, eps_min=0.1, n_tries_per_step=10): '''runs for n_steps and returns traces of designs and metrics ''' self.reset(initial) i = 0 last_reward = float("-inf") no_valids = 0 samples = 0 resets = 0 randoms = 0 if logger is None: metric_log = [] mappend = metric_log.append design_log = [] dappend = design_log.append while i < n_steps: i += 1 if i % 50 == 0: last_reward = float("-inf") self.reset(initial) count = 0 best_reward_this_step = [] best_metrics_this_step = [] best_neighborhood_this_step = [] best_reward_val_this_step = float("-inf") for j in range(n_tries_per_step): # clearing metrics and rewards to prevent case where # we continue on empty neighborhood and end loop without clearing reward # I think this is causing the index error metrics = [] rewards = [] samples += 1 neighborhood = self.get_sampled_neighborhood(4, 2) if len(neighborhood) < 1: continue else: metrics = [ self.get_metrics(n, exc_logger) for n in neighborhood ] count += len(metrics) rewards = [self.get_reward(m) for m in metrics] best_idx = np.argmax(rewards) # if there are no legal and evaluatable moves, ignore this try if rewards[best_idx] == float("-inf"): no_valids += 1 # if on the other hand there is a move that beats the last step # the first step, this is any legal move elif rewards[best_idx] > last_reward: break # otherwise, record this sample in case we can't find a better one # skip if it's worse than the best seen so far elif len(best_reward_this_step) == 0 or rewards[ best_idx] > best_reward_val_this_step: best_reward_this_step = rewards[:] best_metrics_this_step = deepcopy(metrics) best_reward_val_this_step = rewards[best_idx] best_neighborhood_this_step = deepcopy(neighborhood) assert len(rewards) == len(neighborhood) # if we ended and didn't find something better, take the last best legal thing we saw # however, if there's no legal states then just reset #if rewards[best_idx] <= last_reward or rewards[best_idx] == float("-inf"): if len(rewards) == 0 or rewards[best_idx] == float("-inf"): if len(best_reward_this_step) > 0: rewards = best_reward_this_step[:] metrics = deepcopy(best_metrics_this_step) neighborhood = deepcopy(best_neighborhood_this_step) best_idx = np.argmax(rewards) else: last_reward = float("-inf") # if rewards[best_idx] == float("-inf"): # print("No valid moves! Resetting!") # else: # print("No better move! Resetting!") # what should I do here? this means there's nowhere to go that's legal i -= 1 # not sure if this is right, but get the step back. will guarantee n_steps # alternatives, restart and add an empty row, or just skip this step resets += 1 self.reset(initial) continue if np.random.rand() < eps: randoms += 1 # mask out the legal options legal_mask = np.array( [1 if r > float("-inf") else 0 for r in rewards], dtype=np.float32) # convert to probability legal_mask /= np.sum(legal_mask) best_idx = np.random.choice(np.arange(len(rewards)), p=legal_mask) if eps > eps_min: eps *= eps_decay if eps < eps_min: eps = eps_min last_reward = rewards[best_idx] # TODO: need to update occupied when changing state # chosen_neighbor = neighborhood[best_idx] # for district in chosen_neighbor: # for block in chosen_neighbor[district]: # if block not in self.state[district]: # self.occupied.add(block) # for district in self.state: # for block in self.state[district]: # if block not in chosen_neighbor[district]: # self.occupied.remove(block) self.state = neighborhood[best_idx] self.occupied = set(itertools.chain(*self.state.values())) if status is not None: status.put('next') if logger is not None: logger.write( str([ time.perf_counter(), count, list(metrics[best_idx]), self.state ]) + '\n') else: mappend(metrics[best_idx]) dappend(self.state) if logger is not None: return "n_steps: {}, samples: {}, resets: {}, none_valids: {}, randoms: {}".format( n_steps, samples, resets, no_valids, randoms), self.reward_weights else: print( "n_steps: {}, samples: {}, resets: {}, none_valids: {}, randoms: {}" .format(n_steps, samples, resets, no_valids, randoms), self.reward_weights) return design_log, metric_log
def __init__(self): self.voronoi = VoronoiAgent() self.voronoi.load_data()
class DistopiaData(Data): master_metric_list = [ 'population', 'pvi', 'compactness', 'projected_votes', 'race' ] def __init__(self): self.voronoi = VoronoiAgent() self.voronoi.load_data() def set_params(self, specs): for param, param_val in specs.items(): setattr(self, param, param_val) self.preprocessors = specs["preprocessors"] self.generate_task_dicts(len(self.metric_names)) # if "n_workers" in specs: # self.n_workers = specs["n_workers"] # else: # self.n_workers = 1 def load_agent_data(self, fname, fmt=None, labels_path=None, append=False, load_designs=False, load_metrics=False, load_rewards=False, norm_file=None, data_dir=None): """Loads the log file from running agent Assumes that the log file contains data from multiple tasks""" env = DistopiaEnvironment( ) # TODO: This is a temp fix to standardize human metrics env.set_normalization(norm_file, self.metric_names) logs = self.load_json(fname) cur_task = None task_counter = 0 cur_trajectory = [] trajectories = [] # for log in logs: rewards = [] cur_task = logs[0]["task"] print(cur_task) cur_trajectory = [] task_counter += 1 for episode in logs[0]['episodes']: for step in episode['run_log']: step_tuple = [] if load_designs: step_districts = self.jsondistricts2mat(step['design']) step_tuple.append(step_districts) if load_metrics: assert hasattr(self, "metric_names") step_metrics = env.standardize_metrics( self.task_str2arr(step['metrics'])) step_tuple.append(step_metrics) if load_rewards: rewards.append(step['reward']) cur_trajectory.append(step_tuple) if load_rewards: self.rewards = rewards if data_dir: self.save_rewards(data_dir, fname) else: self.save_rewards('', fname) trajectories.append((cur_trajectory[:], cur_task)) if append == False or not hasattr(self, 'x') or not hasattr(self, 'y'): self.y = [] self.x = [] else: self.y = list(self.y) self.x = list(self.x) x = [] y = [] for trajectory in trajectories: samples, task = trajectory for sstep in samples: x.append(*sstep) # any sample data y.append(task) # task for preprocessor in self.preprocessors: x, y = getattr(self, preprocessor)((x, y)) for i in x: self.x.append(i) for j in y: self.y.append(j) self.x = np.array(self.x) self.y = np.array(self.y) def load_data(self, fname, fmt=None, labels_path=None, append=False, load_designs=False, load_metrics=False, norm_file=None, load_fiducials=False): if fmt is None: fmt = self.infer_fmt(fname) print(fmt) if fmt == "pkl" or fmt == "pk": metrics, designs = self.load_pickle(fname) if not hasattr(self, 'feature_type'): raw_data = designs elif self.feature_type == "metrics": raw_data = self.taskdict2vect(metrics) elif self.feature_type == "designs": raw_data = designs self.x, self.y = raw_data for preprocessor in self.preprocessors: self.x, self.y = getattr(self, preprocessor)( (self.x, self.y)) #design_dict2mat_labelled(raw_data) # try to force de-allocation raw_data = None elif fmt == "npy": assert labels_path is not None self.x = np.load(fname) self.y = np.load(labels_path) for preprocessor in self.preprocessors: self.x, self.y = getattr(self, preprocessor)((self.x, self.y)) elif fmt == "json": #it's a log file (for now) # env = DistopiaEnvironment() # TODO: This is a temp fix to standardize human metrics # env.set_normalization(norm_file, self.metric_names) logs = self.load_json(fname) trajectories = [ ] # tuple of trajectory, focus_trajectory, and label cur_task = None cur_focus = "None" cur_trajectory_districts = [] cur_trajectory_metrics = [] cur_trajectory = [] task_counter = 0 for log in logs: keys = log.keys() step_tuple = [] if "task" in keys: trajectories.append((cur_trajectory[:], cur_task)) cur_task = log["task"] print(cur_task) cur_trajectory = [] task_counter += 1 elif "focus" in keys and log['focus']['cmd'] == 'focus_state': cur_focus = log['focus']['param'] elif cur_task is None: continue elif "districts" in keys: if len(log['districts']['districts']) < 8: continue district_sizes = [ len(d['precincts']) for d in log['districts']['districts'] ] if min(district_sizes) < 1: continue if load_designs == True: step_districts = self.jsondistricts2mat( log['districts']['districts']) step_tuple.append(step_districts) if load_metrics == True: assert hasattr(self, "metric_names") #perform normalization on human data # step_metrics = env.standardize_metrics(DistopiaEnvironment.extract_metrics(self.metric_names,log['districts']['metrics'], # log['districts']['districts'],from_json=True)) step_metrics = DistopiaEnvironment.extract_metrics( self.metric_names, log['districts']['metrics'], log['districts']['districts'], from_json=True) step_tuple.append(step_metrics) cur_trajectory.append(step_tuple) trajectories.append((cur_trajectory[:], cur_task)) if append == False or not hasattr(self, 'x') or not hasattr( self, 'y'): self.y = [] self.x = [] else: self.y = list(self.y) self.x = list(self.x) x = [] y = [] for trajectory in trajectories: samples, task = trajectory for sstep in samples: x.append(*sstep) #any sample data y.append(task) #task for preprocessor in self.preprocessors: x, y = getattr(self, preprocessor)((x, y)) #TODO: make this more efficient. probably use np concat or something for i in x: self.x.append(i) for j in y: self.y.append(j) self.x = np.array(self.x) self.y = np.array(self.y) elif fmt == "csv": # TODO: no pre-processing for now, let's fix this later. assert labels_path is not None raw_x = self.load_csv(fname) raw_y = self.load_csv(labels_path) for preprocessor in self.preprocessors: raw_x, raw_y = getattr(self, preprocessor)((raw_x, raw_y)) if append == False or not hasattr(self, 'x') or not hasattr( self, 'y'): self.x = raw_x self.y = raw_y else: self.x = np.concatenate((self.x, raw_x)) self.y = np.concatenate((self.y, raw_y)) def generate_task_dicts(self, dim): if not hasattr(self, "task_labels"): self.task_labels = hierarchical_sort( list(map(np.array, itertools.product(*[[-1., 0., 1.]] * dim)))) else: self.task_labels = hierarchical_sort( list(map(np.array, self.task_labels))) self.task_ids = { self.task_arr2str(task): i for i, task in enumerate(self.task_labels) } self.task_dict = { self.task_arr2str(task): task for task in self.task_labels } # pre-processing functions def save_csv(self, xfname, yfname): print(xfname) with open(xfname + ".csv", 'w+', newline='') as samplefile: with open(yfname + "_labels.csv", 'w+', newline='') as labelfile: samplewriter = csv.writer(samplefile) labelwriter = csv.writer(labelfile) for i, sample in enumerate(self.x): samplewriter.writerow(sample.flatten()) labelwriter.writerow(self.y[i]) def save_rewards(self, data_dir, fname): with open(str(fname) + '_rewards_pickle.txt', 'wb') as rewardsfile: print(rewardsfile) pkl.dump(self.rewards, rewardsfile) def save_npy(self, xfname, yfname): np.save(xfname, self.x) np.save(yfname, self.y) def standardize(self, data, standardization_file=None): env = DistopiaEnvironment() if standardization_file is None: if self.standardization_file is None: raise ValueError("No standardization params are set!") else: standardization_file = self.standardization_file env.set_normalization(standardization_file, self.metric_names) x, y = data return env.standardize_metrics(x), y def destandardize(self, data, standardization_file=None): env = DistopiaEnvironment() if standardization_file is None: if self.standardization_file is None: raise ValueError("No standardization params are set!") else: standardization_file = self.standardization_file env.set_normalization(standardization_file, self.metric_names) x, y = data return env.destandardize_metrics(x), y # pre-process labels def filter_by_task(self, data): x, y = data labels = list(self.task_dict.keys()) mask = [self.task_arr2str(label) in labels for label in y] return x[mask], y[mask] def slice_metrics_to_3(self, data): x, y = data return x[:, :3], y def slice_metrics_to_4(self, data): x, y = data return x[:, :4], y def onehot2class(self, data): x, y = data y_out = [] for label in y: y_out.append(self.task_ids[self.task_arr2str(label)]) print(y_out[:10]) return x, y_out def class2onehot(self, data): x, y = data y_out = [] for label in y: y_out.append(self.task_labels[label]) return x, np.array(y_out) @staticmethod def unflatten_districts(data): flattened_arr_list, labels = data n, flat_dim = flattened_arr_list.shape assert flat_dim == 72 * 8 return flattened_arr_list.reshape(n, 72, 8), labels @staticmethod def task_str2arr(task_str): ''' convert a stringified np array back to the array :param string: the stringified np array :return: a np array ''' assert type(task_str) == str assert task_str[0] == '[' assert task_str[-1] == ']' if ',' in task_str: return np.array(eval(task_str)) else: return np.array(task_str[1:-1].split(), dtype=float) @staticmethod def task_arr2str(arr): return str(np.array(arr, dtype=float)) # pre-process designs def truncate_design_dict(self, design_dict): assert (hasattr(self, "slice_lims")) start, limit = self.slice_lims for key, samples in design_dict.items(): design_dict[key] = samples[start:limit] return design_dict def filter_by_metrics(self, data): '''remove all the data with labels that have nonzero weight on metrics that are not on our list ''' x, y = data x_out = [] y_out = [] metric_indices = [ self.master_metric_list.index(metric) for metric in self.metric_names ] for i, label in enumerate(y): in_scope = True for j, weight in enumerate(label): if np.abs(weight) == 1 and self.master_metric_list[ j] not in self.metric_names: in_scope = False break if in_scope == True: x_out.append(x[i, :]) y_out.append(label[metric_indices]) return np.array(x_out), np.array(y_out) def sliding_window(self, data, window_size=40): if hasattr(self, "window_size"): window_size = self.window_size # should probably also check if metrics or designs here if type(data) == dict: # chunk into 50 and slide the window task_dict = dict() for key, val in data.items(): upper_bound = val.shape[0] - val.shape[0] % 50 truncated = val[:upper_bound] n_steps, metric_dim = truncated.shape n_chunks = n_steps // 50 task_dict[key] = val.reshape(n_chunks, 50, metric_dim) else: x, y = data x_out = [] y_out = [] task_dict = self.get_task_dict(x, y, merge=False) for key, val in task_dict.items(): for instance in val: #slide across as much as possible i = 0 while i + window_size < len(instance): x_win = (instance[i:i + window_size]) x_out.append(instance[i:i + window_size]) y_out.append(self.task_str2arr(key)) i += 1 return np.array(x_out), np.array(y_out) def balance_samples(self, data): '''cuts samples to sample size for each class. Assumes that the order of the data doesn't matter. ''' x, y = data labels = set(y) x_ = None y_ = None y = np.array(y) for label in labels: if x_ is None: x_ = x[np.random.choice( np.where(y == label)[0], self.balanced_sample_size)] assert y_ is None y_ = [label for i in range(self.balanced_sample_size)] else: assert len(x_) == len(y_) x_ = np.concatenate([ x_, x[np.random.choice( np.where(y == label)[0], self.balanced_sample_size)] ]) y_ = np.concatenate( [y_, [label for i in range(self.balanced_sample_size)]]) return x_, y_ def conv3dreshape(self, data): x, y = data n, w, h, d = x.shape return x.reshape(n, h, d, w, 1), y def strip_repeats(self, data): x, y = data x_out = [x[0]] y_out = [y[0]] last_sample = x[0] for i, sample in enumerate(x[1:], 1): if not np.array_equal(last_sample, sample): x_out.append(sample) y_out.append(y[i]) last_sample = sample return np.array(x_out), np.array(y_out) @staticmethod def window_stack(a, stepsize=1, width=3): return np.hstack(a[i:1 + i - width or None:stepsize] for i in range(0, width)) def sliding_window_arr(self, data): print("sliding window") assert hasattr(self, "window_step") assert hasattr(self, "window_size") # this is expensive, but the easiest way I think of to do this is to make a dict on label, convert to windows, and recreate the array data_dict = dict() x_arr, y_arr = data for i, x in enumerate(x_arr): y = y_arr[i] y = self.task_arr2str(y) if y in data_dict: data_dict[y].append(x) else: data_dict[y] = [x] windowed_x = None windowed_y = None for y, xs in data_dict.items(): wxs = self.window_stack(np.array(xs), self.window_step, self.window_size) if windowed_x is None: windowed_x = [wxs] windowed_y = [self.task_str2arr(y)] else: import pdb pdb.set_trace() windowed_x = np.concatenate([windowed_x, wxs]) windowed_y = np.concatenate([windowed_y, self.task_str2arr(y)]) return windowed_x, windowed_y for key, samples in design_dict.items(): design_dict[key] = self.window_stack(samples, self.window_step, self.window_size) return design_dict def sliding_window_dict(self, design_dict): print("sliding window") assert hasattr(self, window_step) assert hasattr(self, window_size) import pdb pdb.set_trace() for key, samples in design_dict.items(): design_dict[key] = self.window_stack(samples, self.window_step, self.window_size) return design_dict def design_dict2mat_labelled(self, design_dict): print("Converting designs to matrices.") lengths = [len(samples) for samples in design_dict.values()] n_samples = np.sum(lengths) print("allocating space.") print("converting {} designs.".format(n_samples)) if self.n_workers < 2: x = np.zeros((n_samples, 72, 8), dtype=np.uint8) y = np.zeros((n_samples, 5), dtype=np.uint8) sample_counter = 0 with tqdm(total=n_samples) as bar: for key in design_dict.keys(): for sample in design_dict[key]: x[sample_counter, :, :] = self.fiducials2district_mat( sample) y[sample_counter, :] = self.task_str2arr(key) sample_counter += 1 bar.update(1) else: print("Multi-Processing the preprocessor") # get the start index for dict entry that we are running through here start_indices = [0] for length in lengths: start_indices.append(start_indices[-1] + length) progress_queue = Manager().Queue() progress_thread = Thread(target=self.progress_monitor, args=(n_samples, progress_queue)) progress_thread.start() queued_tasks = [(design_dict[key], key, progress_queue) for i, key in enumerate(design_dict.keys())] with Pool(self.n_workers) as pool: results = pool.starmap(self.designs2mat_labelled_helper, queued_tasks) # block here until finished x, y = map(np.concatenate, zip(*results)) assert (np.sum(x) == n_samples * 72) return x, y @staticmethod def designs2mat_labelled_helper(designs, task, progress_queue): temp_voronoi = VoronoiAgent() temp_voronoi.load_data() x = np.zeros((len(designs), 72, 8)) y = np.zeros((len(designs), 5)) for i, design in enumerate(designs): # should work, since it is pass-by-object-reference x[i, :, :] = DistopiaData.static_fiducials2district_mat( design, voronoi=temp_voronoi) y[i, :] = DistopiaData.task_str2arr(task) progress_queue.put(1) return x, y def progress_monitor(self, n_samples, progress_queue): for i in tqdm(range(n_samples)): progress_queue.get() @staticmethod def static_fiducials2district_mat(fiducials, voronoi): districts = voronoi.get_voronoi_districts(fiducials) district_mat = DistopiaData.districts2mat(districts) return district_mat def fiducials2district_mat(self, fiducials, voronoi=None): '''converts a dict of fiducials into a matrix representation of district assignments the matrix representation is 72x8 one-hot ''' if voronoi is None: voronoi = self.voronoi districts = voronoi.get_voronoi_districts(fiducials) district_mat = self.districts2mat(districts) return district_mat @staticmethod def districts2mat(district_list): '''takes a list of district objects and returns an occupancy matrix of precincts by district (72x8 one-hot matrix where mat[a,b] indicates whether precinct a is in district b) ''' mat = np.zeros((72, 8), dtype=int) for i, district in enumerate(district_list): precincts = district.precincts for precinct in precincts: mat[int(precinct.identity), i] = 1 return mat @staticmethod def jsondistricts2mat(district_list): '''takes a list of district objects and returns an occupancy matrix of precincts by district (72x8 one-hot matrix where mat[a,b] indicates whether precinct a is in district b) ''' mat = np.zeros((72, 8), dtype=int) for i, district in enumerate(district_list): precincts = district['precincts'] for precinct in precincts: mat[int(precinct), i] = 1 return mat def get_task_dict(self, x=None, y=None, merge=False): '''Get a dictionary of trajectories keyed on task if merge is True, then concat the trajectories ''' if x is None: x = self.x if y is None: y = self.y task_dict = dict() task_keys = set() # start by getting the list of tasks in the data for task in y: task_keys.add(self.task_arr2str(task)) for key in task_keys: task_arr = self.task_str2arr(key) indices = np.where((y == task_arr).all(axis=1))[0] if merge: task_dict[key] = x[indices] # otherwise, we have to split the indices else: task_dict[key] = [] last_idx = indices[0] start_idx = indices[0] for idx in indices[1:]: if idx - last_idx > 1: end_idx = last_idx if end_idx - start_idx > 1: task_dict[key].append(x[start_idx:end_idx]) start_idx = idx last_idx = idx if start_idx < last_idx: task_dict[key].append( x[start_idx:last_idx]) #close up the last one return task_dict def taskdict2vect(self, task_dict): # returns x,y from a dict x = [] y = [] for y_str, x_arr in task_dict.items(): for x_row in x_arr: x.append(x_row) y.append(self.task_str2arr(y_str)) return np.array(x), np.array(y)