def query_cell(self, event, context): cities = [] resolution = h3.h3_get_resolution(event['h3_address']) base_cell = str(h3.h3_get_base_cell(event['h3_address'])) if resolution < max_res: max_query_res = resolution else: max_query_res = max_res range_query = "#".join([ h3.h3_to_parent(event['h3_address'], x) for x in range(min_res, max_query_res + 1) ]) key_condition_expression = "ParentCell = :parentcell AND begins_with(CellLocationIndex, :index)" expression_values = { ":parentcell": { "S": base_cell }, ":index": { "S": range_query } } resp = self.query_db_table(key_condition_expression, expression_values) for item in resp['Items']: city = item['CityName']['S'] if city not in cities: cities.append(city) return cities
def occupied_neighbors(hex, density_tgt, density_max, N, hex_density, method='siblings'): """ :param hex: hex to query :param density_tgt: target density for hexs at this resolution :param density_max: maximum density at this resolution :param hex_density: dictionary of densities at each hex :param N: :param method: either siblings or neighbors :return: """ # neigbhors = h3.hex_range(h, 1) #neigbhors = h3.h3_to_children(h3.h3_to_parent(h, resolution - 1), resolution) res = h3.h3_get_resolution(hex) if method == 'siblings': neighbors = h3.h3_to_children(h3.h3_to_parent(hex, res - 1), res) elif method == 'neighbors': neighbors = h3.hex_range(hex, 1) neighbors_above_tgt = 0 for n in neighbors: if n not in hex_density: continue if hex_density[n]['clipped'] >= density_tgt: neighbors_above_tgt += 1 clip = min(density_max, density_tgt * max(1, (neighbors_above_tgt - N + 1))) return clip
def resolution_downsampling(gdf, hex_col, coarse_resolution, agg): ''' Downsample hexagon resolution aggregating indicated metrics (e.g. Transform hexagon resolution from 9 to 6). Parameters ---------- gdf: GeoDataFrame GeoDataFrame with hexagon geometries (output from gen_hexagons). hex_col: str Determines the column with the hex id. coarse_resolution: int, 0:15 Hexagon resolution lower than gdf actual resolution (higher values create smaller hexagons). Returns ------- gdfc: GeoDataFrame GeoDataFrame with lower resolution hexagons geometry and metrics aggregated as indicated. ''' gdf_coarse = gdf.copy() coarse_hex_col = 'hex_{}'.format(coarse_resolution) gdf_coarse[coarse_hex_col] = gdf_coarse[hex_col].apply( lambda x: h3.h3_to_parent(x, coarse_resolution)) dfc = gdf_coarse.groupby([coarse_hex_col]).agg(agg).reset_index() gdfc_geometry = dfc[coarse_hex_col].apply(geo_boundary_to_polygon) return gpd.GeoDataFrame(dfc, geometry=gdfc_geometry, crs=gdf.crs)
def add_h3_ids_to_points(df: pd.DataFrame, h3_max: int, h3_min: int) -> pd.DataFrame: """Add Uber H3 ids to the point geometries in a Spatially Enabled DataFrame. :param df: Spatially Enabled DataFrame with point geometries to be aggregated. :param h3_max: Integer maximum H3 grid level defining the samllest geographic hex area - must be larger than the minimum. :param h3_min: Integer minimum H3 grid level defining the largest geograhpic hex area - must be smaller than the maximum. :return: Pandas DataFrame with Uber H3 ids added for all the resolutions betwen teh maximum and minimum. """ assert h3_max > h3_min # get a list of zoom levels and ensure the H3 levels are sorted from highest to lowest resolution h3_lvl_lst = _get_h3_range_lst(h3_min, h3_max) h3_lvl_lst.sort(reverse=True) # calculate the highest resolution H3 id for each location first_level = h3_lvl_lst[0] df[_h3_col( first_level)] = df.SHAPE.swifter.apply(lambda geom: h3.geo_to_h3( geom.centroid[1], geom.centroid[0], first_level)) # use the highest resolution H3 id to get progressivley lower resolution H3 id's for h3_lvl in h3_lvl_lst[1:]: df[_h3_col(h3_lvl)] = df[_h3_col(first_level)].swifter.apply( lambda first_val: h3.h3_to_parent(first_val, h3_lvl)) return df
def load_cities(self, event, context): with open('usa_cities.geojson', 'r') as geoj: cities = json.load(geoj)['features'] _db_table = dynamodb_resource.Table(db_table.name) print("Loading database table") with _db_table.batch_writer() as db_batch: for idx, city in enumerate(cities): city_id = str(idx) if city['properties']['NAME']: hexagons = h3.polyfill(city['geometry'], max_res, geo_json_conformant=True) if len(hexagons) > 0: for hex in hexagons: parents = [ h3.h3_to_parent(hex, x) for x in range(min_res, max_res + 1) ] range_key = "#".join(parents) + "#{}".format( city_id) db_item = { 'ParentCell': "{}".format(h3.h3_get_base_cell(hex)), 'CellLocationIndex': range_key, 'CityName': city['properties']['NAME'], 'CityID': city_id } db_batch.put_item(Item=db_item) if idx % 1000 == 0: print("Processed {} cities".format(idx))
def cell_h3_downsampling(df, cell_id_col, metric_col, coarse_resolution, metric_type): """Aggregates a given attribute in h3 cell to a given coarser resolution level Parameters: df (pandas dataframe): dataframe with s2 ids and attributes for aggregation cell_id_col (string): name of s2 id column metric_col (string): name of a column for aggreagation coarse_resolution (integer): Coarser s2 resoluiton for aggregation metric_type (string): attribute type (numerical, categorical) Returns: Pandas dataframe """ df_coarse = df.copy() coarse_id_col = 'cell_id_{}'.format(coarse_resolution) df_coarse[coarse_id_col] = df_coarse[cell_id_col].apply(lambda x: h3.h3_to_parent(x, coarse_resolution)) if metric_type == 'numeric': dfc = df_coarse.groupby(coarse_id_col)[[metric_col]].mean().reset_index() elif metric_type == 'categorical': dfc = df_coarse.groupby([coarse_id_col, metric_col]).agg(count=(metric_col, 'count')).reset_index().sort_values( by=[coarse_id_col, metric_col, 'count']).groupby(coarse_id_col, as_index=False, sort=False).first() dfc.drop('count', axis=1, inplace=True) dfc.columns = [cell_id_col, metric_col] return dfc
def downscale_h3(time_win_df, agg_brothers, downscale_size=2, h3_index_col="h3_index"): # tODO: 2nd for loop should be a while with selected res at top # Only with this number of brother the hexagons will be scaled (max = 7) n_min_brothers_to_scale = 5 time_win_h3 = time_win_df.reset_index() time_win_h3["h3_res"] = time_win_h3[h3_index_col].apply( h3.h3_get_resolution) downscale_resulutions = range(time_win_h3["h3_res"].min(), time_win_h3["h3_res"].min() - downscale_size, -1) for child_h3_res_depth, downscale_res in enumerate(downscale_resulutions): print("Auto downscale h3 resolution:", downscale_res) for idx, row in time_win_h3.iterrows(): # Once time_win_h3 indexs get changed during the loop and the time_win_h3.iterrows() # is a copy of the rows, they might not exist if idx not in time_win_h3.index: continue h3idx = row[h3_index_col] cell_res = row["h3_res"] # If its a different res than the one we re trying to downscale, skips if cell_res != downscale_res: continue parent = h3.h3_to_parent(h3idx, cell_res - 1) # finding all the brother cells brother_cells = list(h3.h3_to_children(parent, cell_res)) # dont scale if there is less than X brothers if time_win_h3[h3_index_col].isin( brother_cells).sum() < n_min_brothers_to_scale: continue # finding all the children cells for childh3res in range(1, child_h3_res_depth + 1): brother_cells.extend( list(h3.h3_to_children(parent, cell_res + childh3res))) brothers_df = time_win_h3[time_win_h3[h3_index_col].isin( brother_cells)] agg_result = agg_brothers(brothers_df) if agg_result is False: continue # set the cols to the parent values agg_result[h3_index_col] = parent agg_result["h3_res"] = cell_res - 1 time_win_h3.loc[idx] = agg_result # drop the rest of the brothers time_win_h3.drop([i for i in brothers_df.index if i != idx], inplace=True) return time_win_h3.set_index(h3_index_col)
def hex_occupancy(hexs, level=None): """ converts list of hexs to level (if provided) and returns a dictionary of with keys of hexs and values of counts :param hexs: list of hexs :param level: desired level (all provided hex's should be below or equal to level :return: """ res = dict() for h in hexs: if level: h = h3.h3_to_parent(h, level) res.setdefault(h, 0) res[h] += 1 return res
def to_parent(self) -> Tile: """Maps current tile to parent Tile object. Returns ------- Tile """ if self.grid_type == "s2": parent_id = s2.s2_to_parent(self.tile_id) elif self.grid_type == "h3": parent_id = h3.h3_to_parent(self.tile_id) elif self.grid_type in ("bing", "quadtree"): parent_id = quadtree.tile_to_parent(self.tile_id) return self.id_to_tile(parent_id)
def _get_cell(gjson, resolution, parent_resolution=1, keep_wkt=False): inverse_coords = True temp = pd.Series(list( h3.polyfill(gjson.values[0], resolution, inverse_coords)), name="id").to_frame() if len(temp) == 0: temp = pd.Series( [ h3.geo_to_h3( **dict( zip(("lng", "lat"), gjson.values[0]["coordinates"][0][0])), res=resolution, ) ], name="id", ).to_frame() inverse_coords = True temp["resolution"] = resolution temp["id_parent"] = temp["id"].apply( lambda x: h3.h3_to_parent(x, res=parent_resolution)) temp["group"] = (temp.index / 100).astype(int) if keep_wkt: temp["wkt"] = _to_wkt(gjson.values[0]["coordinates"][0]) else: temp["wkt"] = temp["id"].apply( lambda x: _to_wkt(h3.h3_to_geo_boundary(x, inverse_coords))) return temp
def test_h3_to_parent(self): test_hexagon = '89283082813ffff' parent_hexagon = h3.h3_to_parent(test_hexagon, 8) self.assertEqual(parent_hexagon, '8828308281fffff', 'got the parent back')
def sample(hotspots, density_tgt, density_max, R, N): # ============================================================== # Part 1, find hexs and density of hexs containing interactive # hotspots at highest resolution # ============================================================== # determine density of occupied "tgt_resolution" hexs. This sets our initial conditions. I also track "actual" vs # clipped density to find discrepancies #hex_density will be keys of hexs (all resolutions) with a value of dict(clipped=0, actual=0) hex_density = dict() interactive = 0 for h in hotspots: if is_interactive(h): hex = h3.h3_to_parent(h['location'], R) interactive += 1 # initialize the hex if not in dictionary if hex not in hex_density: hex_density[hex] = dict(clipped=0, actual=0, unclipped=0) hex_density[hex]['clipped'] += 1 hex_density[hex]['actual'] += 1 hex_density[hex]['unclipped'] += 1 print(f"{len(hotspots)} hotspots") print(f"{len(hex_density)} unique res {R} hexs") print(f"{lone_wolfs} lone wolfs") print(f"{interactive} interactive hotspots") #build a set of R resolution hexs, occupied child hexs are how we build occupied hexs for parent levels child_hexs = set(hex_density.keys()) # ============================================================== # Part 2, go from high to low res, clipping density and determining # densities of parent hexs # ============================================================== # iterate through resultion from just above target to 1 clipping child densities and calculating appropriate hex # densities at "resolution" for resolution in range(R - 1, 0, -1): # hold set of hex's to evaluate occupied_hexs = dict() # key = parent hex, values = list of child hexs # density target and limit at child's resolution. This is simply scaled up by increased area density = density_tgt * 7**(R - resolution - 1) density_limit = density_max * 7**(R - resolution - 1) # print(f"res: {resolution+1}, density: {density}, limit: {density_limit}") # 1. find all occupied hexs at this resolution based on child hexs for h in child_hexs: occupied_hexs.setdefault(h3.h3_to_parent(h, resolution), []) occupied_hexs[h3.h3_to_parent(h, resolution)].append(h) # for each occupied hex at this level, evaluate its children for h in occupied_hexs: children = occupied_hexs[h] # 1. find count of children > tgt_density to possibly elevate clipping value of N threshold met. above_density_cnt = 0 for c in children: if hex_density.get(c, dict(clipped=0, actual=0, uncipped=0))['clipped'] >= density: above_density_cnt += 1 hex_raw_density = 0 hex_unclipped_density = 0 # clip children at density_tgt unless above_density_cnt meets threshold, then calculate appropriate clipping clip = density if above_density_cnt > N: clip = min(density_limit, density * (above_density_cnt - N + 1)) # iterate through all children clipping density and calculating density for this hex. Note this may not be # appropriately clipped since we need to evaluate all this hex's siblings (will be done in next iteration) # of outer loop for c in children: hex_density[c]['clipped'] = min(clip, hex_density[c]['clipped']) hex_unclipped_density += hex_density[c]['actual'] hex_raw_density += hex_density[c]['clipped'] # set this hex raw density unclipped (will be clipped at parent) hex_density[h] = dict(clipped=hex_raw_density, actual=hex_unclipped_density, unclipped=hex_raw_density) print( f"total of {len(occupied_hexs)} occupied hexes at resolution {resolution}" ) # occupied hex's at this resolution are child hexs in next resolution child_hexs = occupied_hexs # ============================================================== # Part 3, print / store analysis # ============================================================== # occupied_hex's is now the top level hex evaluated. Start here for descending to target a hotspot top_count = 0 for h in occupied_hexs: #print(f"hex {h} has density {hex_density[h]}") top_count += hex_density[h]['clipped'] print(f"total density of all top level hexs = {top_count}") # track max/min hex for gut check interactive_hspots = 0 with open(f'hex_occupancy_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv', 'w', newline='') as csvfile: hex_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) hex_writer.writerow( ['hex', 'resolution', 'density_clipped', 'density_actual']) for k in hex_density: hex_writer.writerow([ k, h3.h3_get_resolution(k), hex_density[k]['clipped'], hex_density[k]['actual'] ]) with open( f'hotspot_tgting_prob_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv', 'w', newline='') as csvfile: hspot_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) hspot_writer.writerow(['address', 'name', 'city', 'state', 'prob']) # iterate through all interactive hotspots and evaluate probability of targeting. this will be outputted to CSV for hspot in hotspots: # start at top level and iterate through determining odds of selection if not is_interactive(hspot): continue interactive_hspots += 1 sibling_total = top_count sibling_unclipped = 0 probability = 1 scale = 1 for res in range(1, R + 1): #for res in range(R, 0, -1): hex = h3.h3_to_parent(hspot['location'], res) prob_orig = probability probability *= hex_density[hex]['clipped'] / sibling_total scale_orig = scale scale *= hex_density[hex]['clipped'] / hex_density[hex][ 'unclipped'] if hspot['name'] == 'blunt-clay-puppy': print( f"{hex} h3res:{res} has density clipped/unclipped of {hex_density[hex]['clipped']:3d}/{hex_density[hex]['unclipped']:3d}, prob reduced: {prob_orig:.3f} to {probability:.3f}" ) sibling_total = hex_density[hex]['clipped'] sibling_unclipped = hex_density[hex]['actual'] probability *= 1 / sibling_unclipped hspot_writer.writerow([ hspot['address'], hspot['name'], hspot['geocode']['short_city'], hspot['geocode']['short_state'], f"{probability:.6f}" ]) # print(f"hotspot {hspot['name']:30} has {sibling_unclipped} hotspots in res8 cell, probability {probability*100:.8f}%") print(f"total of {interactive_hspots} interactive hotspots")
def sample_neighbor(hotspots, density_tgt, density_max, R, N): # ============================================================== # Part 1, find hexs and density of hexs containing interactive # hotspots at target resolution # ============================================================== # determine density of occupied "tgt_resolution" hexs. This sets our initial conditions. I also track "actual" vs # clipped density to find discrepancies #hex_density will be keys of hexs (all resolutions) with a value of dict(clipped=0, actual=0) hex_density = dict() interactive = 0 for h in hotspots: if is_interactive(h): hex = h3.h3_to_parent(h['location'], R) interactive += 1 # initialize the hex if not in dictionary if hex not in hex_density: hex_density[hex] = dict(clipped=0, actual=0, unclipped=0) hex_density[hex]['clipped'] += 1 hex_density[hex]['actual'] += 1 hex_density[hex]['unclipped'] += 1 for h in hex_density.keys(): clip = occupied_neighbors(h, density_tgt, density_max, N, hex_density, method='neighbors') hex_density[h]['clipped'] = min(hex_density[h]['clipped'], clip) hex_density[h]['limit'] = clip print(f"{len(hotspots)} hotspots") print(f"{len(hex_density)} unique res {R} hexs") print(f"{lone_wolfs} lone wolfs") print(f"{interactive} interactive hotspots") #build a set of R resolution hexs, occupied child hexs are how we build occupied hexs for parent levels occupied_higher_res = set(hex_density.keys()) # ============================================================== # Part 2, go from high to low res, clipping density and determining # densities of parent hexs # ============================================================== # iterate through resultion from just above target to 1 clipping child densities and calculating appropriate hex # densities at "resolution" for resolution in range(R - 1, 0, -1): # hold set of hex's to evaluate occupied_hexs = set( []) # key = parent hex, values = list of child hexs # density target and limit at child's resolution. This is simply scaled up by increased area density_res_tgt = density_tgt * 7**(R - resolution) density_res_max = density_max * 7**(R - resolution) # 1. find all occupied hexs at this resolution based on child hexs for h in occupied_higher_res: occupied_hexs.add(h3.h3_to_parent(h, resolution)) for h in occupied_hexs: children = h3.h3_to_children(h, resolution + 1) # calculate density of this hex by summing the clipped density of its children hex_raw_density = 0 hex_unclipped_density = 0 for c in children: if c in hex_density: hex_raw_density += hex_density[c]['clipped'] hex_unclipped_density += hex_density[c]['actual'] hex_density[h] = dict(clipped=hex_raw_density, actual=hex_unclipped_density, unclipped=hex_raw_density) # now that we have unclipped densities of each occupied hex at this resolution, iterate through all occupied # hexs again and apply clipping by looking at neighbors: for h in occupied_hexs: #neigbhors = h3.hex_range(h, 1) #neigbhors = h3.h3_to_children(h3.h3_to_parent(h, resolution - 1), resolution) clip = occupied_neighbors(h, density_res_tgt, density_res_max, N, hex_density, method='neighbors') hex_density[h]['clipped'] = min(hex_density[h]['clipped'], clip) hex_density[h]['limit'] = clip occupied_higher_res = list(occupied_hexs) print( f"total of {len(occupied_hexs)} occupied hexes at resolution {resolution}" ) # occupied hex's at this resolution are child hexs in next resolution child_hexs = occupied_hexs # ============================================================== # Part 3, print / store analysis # ============================================================== # occupied_hex's is now the top level hex evaluated. Start here for descending to target a hotspot top_count = 0 for h in occupied_hexs: #print(f"hex {h} has density {hex_density[h]}") top_count += hex_density[h]['clipped'] print(f"total density of all top level hexs = {top_count}") # for k in hex_density.keys(): # hex_density[k]['border'] = h3.h3_to_geo_boundary(k, False) interactive_hspots = 0 with open(f'hex_occupancy_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv', 'w', newline='') as csvfile: hex_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) hex_writer.writerow([ 'hex', 'resolution', 'density_clipped', 'density_actual', 'density_limit' ]) for k in hex_density: hex_writer.writerow([ k, h3.h3_get_resolution(k), hex_density[k]['clipped'], hex_density[k]['actual'], hex_density[k]['limit'] ]) with open( f'hotspot_RewardScale_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv', 'w', newline='') as csvfile: hspot_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) hspot_writer.writerow( ['address', 'name', 'city', 'state', 'reward_scale']) # iterate through all interactive hotspots and evaluate probability of targeting. this will be outputted to CSV for hspot in hotspots: # start at top level and iterate through determining odds of selection if not is_interactive(hspot): continue interactive_hspots += 1 scale = 1 probability = 1 #for res in range(1, R+1): for res in range(R, 0, -1): hex = h3.h3_to_parent(hspot['location'], res) scale_orig = scale scale *= hex_density[hex]['clipped'] / hex_density[hex][ 'unclipped'] if hspot['name'] == 'daring-carmine-penguin': print( f"{hex} h3res:{res} has density clipped/unclipped of {hex_density[hex]['clipped']:3d}/{hex_density[hex]['unclipped']:3d}, scale reduced: {scale_orig:.3f} to {scale:.3f}" ) sibling_total = hex_density[hex]['clipped'] sibling_unclipped = hex_density[hex]['actual'] hspot_writer.writerow([ hspot['address'], hspot['name'], hspot['geocode']['short_city'], hspot['geocode']['short_state'], f"{scale:.5f}" ]) # print(f"hotspot {hspot['name']:30} has {sibling_unclipped} hotspots in res8 cell, probability {probability*100:.8f}%") print(f"total of {interactive_hspots} interactive hotspots")
def safe_h3_to_parent(h3_address): return h3.h3_to_parent(h3_address, 1)