def occupied_neighbors(hex, density_tgt, density_max, N, hex_density, method='siblings'): """ :param hex: hex to query :param density_tgt: target density for hexs at this resolution :param density_max: maximum density at this resolution :param hex_density: dictionary of densities at each hex :param N: :param method: either siblings or neighbors :return: """ # neigbhors = h3.hex_range(h, 1) #neigbhors = h3.h3_to_children(h3.h3_to_parent(h, resolution - 1), resolution) res = h3.h3_get_resolution(hex) if method == 'siblings': neighbors = h3.h3_to_children(h3.h3_to_parent(hex, res - 1), res) elif method == 'neighbors': neighbors = h3.hex_range(hex, 1) neighbors_above_tgt = 0 for n in neighbors: if n not in hex_density: continue if hex_density[n]['clipped'] >= density_tgt: neighbors_above_tgt += 1 clip = min(density_max, density_tgt * max(1, (neighbors_above_tgt - N + 1))) return clip
def query_cell(self, event, context): cities = [] resolution = h3.h3_get_resolution(event['h3_address']) base_cell = str(h3.h3_get_base_cell(event['h3_address'])) if resolution < max_res: max_query_res = resolution else: max_query_res = max_res range_query = "#".join([ h3.h3_to_parent(event['h3_address'], x) for x in range(min_res, max_query_res + 1) ]) key_condition_expression = "ParentCell = :parentcell AND begins_with(CellLocationIndex, :index)" expression_values = { ":parentcell": { "S": base_cell }, ":index": { "S": range_query } } resp = self.query_db_table(key_condition_expression, expression_values) for item in resp['Items']: city = item['CityName']['S'] if city not in cities: cities.append(city) return cities
def test_h3_get_resolution(self): for res in range(16): h3_address = h3.geo_to_h3(37.3615593, -122.0553238, res) self.assertEqual( h3.h3_get_resolution(h3_address), res, 'Got the expected H3 resolution back' )
def choropleth_map(df_agg, value_col, name, border_color='black', fill_opacity=0.7, initial_map=None, with_legend=False, kind='linear'): min_value = df_agg[value_col].min() max_value = df_agg[value_col].max() m = round((min_value + max_value) / 2, 0) res = h3.h3_get_resolution(df_agg.loc[0, 'hex_id']) if initial_map is None: initial_map = Map( location=[39.970208, -83.000645], zoom_start=13, tiles='cartodbpositron', attr=( '© <a href="http://www.openstreetmap.org/copyright">' + 'OpenStreetMap</a> contributors © <a href="http://cartodb.' + 'com/attributions#basemaps">CartoDB</a>' ) ) if kind == 'linear': custom_cm = cm.LinearColormap( ['green', 'yellow', 'red'], vmin=min_value, vmax=max_value) elif kind == 'outlier': custom_cm = cm.LinearColormap( ['blue', 'white', 'red'], vmin=min_value, vmax=max_value) geojson_data = hexagons_dataframe_to_geojson( df_hex=df_agg, value_col=value_col) GeoJson( geojson_data, style_function=lambda feature: { 'fillColor': custom_cm(feature['properties']['value']), 'color': border_color, 'weight': 1, 'fillOpacity': fill_opacity }, name=name ).add_to(initial_map) if with_legend is True: custom_cm.add_to(initial_map) return initial_map
def random_location(candidate_hex_addresses, resolution): root_hex = random.choice(candidate_hex_addresses) root_resolution = h3.h3_get_resolution(root_hex) current_resolution = root_resolution current_cell = root_hex while current_resolution < resolution: # Step up one resolution. current_resolution += 1 # Get all the children of the current cell. children = h3.h3_to_children(current_cell, current_resolution) # Draw a random child and set it to the current cell. current_cell = random.choice(list(children)) return h3.h3_to_geo(current_cell)
def resolution(self) -> int: """Maps tile id to resolution/zoom/size Parameters ---------- tile_id : str Returns ------- int Resolution/zoom/size """ if self.grid_type == "s2": return s2.s2_get_resolution(self.tile_id) elif self.grid_type == "h3": return h3.h3_get_resolution(self.tile_id) elif self.grid_type in ("bing", "quadtree"): return quadtree.tile_get_resolution(self.tile_id)
def sample(hotspots, density_tgt, density_max, R, N): # ============================================================== # Part 1, find hexs and density of hexs containing interactive # hotspots at highest resolution # ============================================================== # determine density of occupied "tgt_resolution" hexs. This sets our initial conditions. I also track "actual" vs # clipped density to find discrepancies #hex_density will be keys of hexs (all resolutions) with a value of dict(clipped=0, actual=0) hex_density = dict() interactive = 0 for h in hotspots: if is_interactive(h): hex = h3.h3_to_parent(h['location'], R) interactive += 1 # initialize the hex if not in dictionary if hex not in hex_density: hex_density[hex] = dict(clipped=0, actual=0, unclipped=0) hex_density[hex]['clipped'] += 1 hex_density[hex]['actual'] += 1 hex_density[hex]['unclipped'] += 1 print(f"{len(hotspots)} hotspots") print(f"{len(hex_density)} unique res {R} hexs") print(f"{lone_wolfs} lone wolfs") print(f"{interactive} interactive hotspots") #build a set of R resolution hexs, occupied child hexs are how we build occupied hexs for parent levels child_hexs = set(hex_density.keys()) # ============================================================== # Part 2, go from high to low res, clipping density and determining # densities of parent hexs # ============================================================== # iterate through resultion from just above target to 1 clipping child densities and calculating appropriate hex # densities at "resolution" for resolution in range(R - 1, 0, -1): # hold set of hex's to evaluate occupied_hexs = dict() # key = parent hex, values = list of child hexs # density target and limit at child's resolution. This is simply scaled up by increased area density = density_tgt * 7**(R - resolution - 1) density_limit = density_max * 7**(R - resolution - 1) # print(f"res: {resolution+1}, density: {density}, limit: {density_limit}") # 1. find all occupied hexs at this resolution based on child hexs for h in child_hexs: occupied_hexs.setdefault(h3.h3_to_parent(h, resolution), []) occupied_hexs[h3.h3_to_parent(h, resolution)].append(h) # for each occupied hex at this level, evaluate its children for h in occupied_hexs: children = occupied_hexs[h] # 1. find count of children > tgt_density to possibly elevate clipping value of N threshold met. above_density_cnt = 0 for c in children: if hex_density.get(c, dict(clipped=0, actual=0, uncipped=0))['clipped'] >= density: above_density_cnt += 1 hex_raw_density = 0 hex_unclipped_density = 0 # clip children at density_tgt unless above_density_cnt meets threshold, then calculate appropriate clipping clip = density if above_density_cnt > N: clip = min(density_limit, density * (above_density_cnt - N + 1)) # iterate through all children clipping density and calculating density for this hex. Note this may not be # appropriately clipped since we need to evaluate all this hex's siblings (will be done in next iteration) # of outer loop for c in children: hex_density[c]['clipped'] = min(clip, hex_density[c]['clipped']) hex_unclipped_density += hex_density[c]['actual'] hex_raw_density += hex_density[c]['clipped'] # set this hex raw density unclipped (will be clipped at parent) hex_density[h] = dict(clipped=hex_raw_density, actual=hex_unclipped_density, unclipped=hex_raw_density) print( f"total of {len(occupied_hexs)} occupied hexes at resolution {resolution}" ) # occupied hex's at this resolution are child hexs in next resolution child_hexs = occupied_hexs # ============================================================== # Part 3, print / store analysis # ============================================================== # occupied_hex's is now the top level hex evaluated. Start here for descending to target a hotspot top_count = 0 for h in occupied_hexs: #print(f"hex {h} has density {hex_density[h]}") top_count += hex_density[h]['clipped'] print(f"total density of all top level hexs = {top_count}") # track max/min hex for gut check interactive_hspots = 0 with open(f'hex_occupancy_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv', 'w', newline='') as csvfile: hex_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) hex_writer.writerow( ['hex', 'resolution', 'density_clipped', 'density_actual']) for k in hex_density: hex_writer.writerow([ k, h3.h3_get_resolution(k), hex_density[k]['clipped'], hex_density[k]['actual'] ]) with open( f'hotspot_tgting_prob_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv', 'w', newline='') as csvfile: hspot_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) hspot_writer.writerow(['address', 'name', 'city', 'state', 'prob']) # iterate through all interactive hotspots and evaluate probability of targeting. this will be outputted to CSV for hspot in hotspots: # start at top level and iterate through determining odds of selection if not is_interactive(hspot): continue interactive_hspots += 1 sibling_total = top_count sibling_unclipped = 0 probability = 1 scale = 1 for res in range(1, R + 1): #for res in range(R, 0, -1): hex = h3.h3_to_parent(hspot['location'], res) prob_orig = probability probability *= hex_density[hex]['clipped'] / sibling_total scale_orig = scale scale *= hex_density[hex]['clipped'] / hex_density[hex][ 'unclipped'] if hspot['name'] == 'blunt-clay-puppy': print( f"{hex} h3res:{res} has density clipped/unclipped of {hex_density[hex]['clipped']:3d}/{hex_density[hex]['unclipped']:3d}, prob reduced: {prob_orig:.3f} to {probability:.3f}" ) sibling_total = hex_density[hex]['clipped'] sibling_unclipped = hex_density[hex]['actual'] probability *= 1 / sibling_unclipped hspot_writer.writerow([ hspot['address'], hspot['name'], hspot['geocode']['short_city'], hspot['geocode']['short_state'], f"{probability:.6f}" ]) # print(f"hotspot {hspot['name']:30} has {sibling_unclipped} hotspots in res8 cell, probability {probability*100:.8f}%") print(f"total of {interactive_hspots} interactive hotspots")
def sample_neighbor(hotspots, density_tgt, density_max, R, N): # ============================================================== # Part 1, find hexs and density of hexs containing interactive # hotspots at target resolution # ============================================================== # determine density of occupied "tgt_resolution" hexs. This sets our initial conditions. I also track "actual" vs # clipped density to find discrepancies #hex_density will be keys of hexs (all resolutions) with a value of dict(clipped=0, actual=0) hex_density = dict() interactive = 0 for h in hotspots: if is_interactive(h): hex = h3.h3_to_parent(h['location'], R) interactive += 1 # initialize the hex if not in dictionary if hex not in hex_density: hex_density[hex] = dict(clipped=0, actual=0, unclipped=0) hex_density[hex]['clipped'] += 1 hex_density[hex]['actual'] += 1 hex_density[hex]['unclipped'] += 1 for h in hex_density.keys(): clip = occupied_neighbors(h, density_tgt, density_max, N, hex_density, method='neighbors') hex_density[h]['clipped'] = min(hex_density[h]['clipped'], clip) hex_density[h]['limit'] = clip print(f"{len(hotspots)} hotspots") print(f"{len(hex_density)} unique res {R} hexs") print(f"{lone_wolfs} lone wolfs") print(f"{interactive} interactive hotspots") #build a set of R resolution hexs, occupied child hexs are how we build occupied hexs for parent levels occupied_higher_res = set(hex_density.keys()) # ============================================================== # Part 2, go from high to low res, clipping density and determining # densities of parent hexs # ============================================================== # iterate through resultion from just above target to 1 clipping child densities and calculating appropriate hex # densities at "resolution" for resolution in range(R - 1, 0, -1): # hold set of hex's to evaluate occupied_hexs = set( []) # key = parent hex, values = list of child hexs # density target and limit at child's resolution. This is simply scaled up by increased area density_res_tgt = density_tgt * 7**(R - resolution) density_res_max = density_max * 7**(R - resolution) # 1. find all occupied hexs at this resolution based on child hexs for h in occupied_higher_res: occupied_hexs.add(h3.h3_to_parent(h, resolution)) for h in occupied_hexs: children = h3.h3_to_children(h, resolution + 1) # calculate density of this hex by summing the clipped density of its children hex_raw_density = 0 hex_unclipped_density = 0 for c in children: if c in hex_density: hex_raw_density += hex_density[c]['clipped'] hex_unclipped_density += hex_density[c]['actual'] hex_density[h] = dict(clipped=hex_raw_density, actual=hex_unclipped_density, unclipped=hex_raw_density) # now that we have unclipped densities of each occupied hex at this resolution, iterate through all occupied # hexs again and apply clipping by looking at neighbors: for h in occupied_hexs: #neigbhors = h3.hex_range(h, 1) #neigbhors = h3.h3_to_children(h3.h3_to_parent(h, resolution - 1), resolution) clip = occupied_neighbors(h, density_res_tgt, density_res_max, N, hex_density, method='neighbors') hex_density[h]['clipped'] = min(hex_density[h]['clipped'], clip) hex_density[h]['limit'] = clip occupied_higher_res = list(occupied_hexs) print( f"total of {len(occupied_hexs)} occupied hexes at resolution {resolution}" ) # occupied hex's at this resolution are child hexs in next resolution child_hexs = occupied_hexs # ============================================================== # Part 3, print / store analysis # ============================================================== # occupied_hex's is now the top level hex evaluated. Start here for descending to target a hotspot top_count = 0 for h in occupied_hexs: #print(f"hex {h} has density {hex_density[h]}") top_count += hex_density[h]['clipped'] print(f"total density of all top level hexs = {top_count}") # for k in hex_density.keys(): # hex_density[k]['border'] = h3.h3_to_geo_boundary(k, False) interactive_hspots = 0 with open(f'hex_occupancy_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv', 'w', newline='') as csvfile: hex_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) hex_writer.writerow([ 'hex', 'resolution', 'density_clipped', 'density_actual', 'density_limit' ]) for k in hex_density: hex_writer.writerow([ k, h3.h3_get_resolution(k), hex_density[k]['clipped'], hex_density[k]['actual'], hex_density[k]['limit'] ]) with open( f'hotspot_RewardScale_R{R}_N{N}_tgt{density_tgt}_max{density_max}.csv', 'w', newline='') as csvfile: hspot_writer = csv.writer(csvfile, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) hspot_writer.writerow( ['address', 'name', 'city', 'state', 'reward_scale']) # iterate through all interactive hotspots and evaluate probability of targeting. this will be outputted to CSV for hspot in hotspots: # start at top level and iterate through determining odds of selection if not is_interactive(hspot): continue interactive_hspots += 1 scale = 1 probability = 1 #for res in range(1, R+1): for res in range(R, 0, -1): hex = h3.h3_to_parent(hspot['location'], res) scale_orig = scale scale *= hex_density[hex]['clipped'] / hex_density[hex][ 'unclipped'] if hspot['name'] == 'daring-carmine-penguin': print( f"{hex} h3res:{res} has density clipped/unclipped of {hex_density[hex]['clipped']:3d}/{hex_density[hex]['unclipped']:3d}, scale reduced: {scale_orig:.3f} to {scale:.3f}" ) sibling_total = hex_density[hex]['clipped'] sibling_unclipped = hex_density[hex]['actual'] hspot_writer.writerow([ hspot['address'], hspot['name'], hspot['geocode']['short_city'], hspot['geocode']['short_state'], f"{scale:.5f}" ]) # print(f"hotspot {hspot['name']:30} has {sibling_unclipped} hotspots in res8 cell, probability {probability*100:.8f}%") print(f"total of {interactive_hspots} interactive hotspots")
def choropleth_map(df_aggreg, border_color='black', fill_opacity=0.7, initial_map=None, with_legend=False, kind="linear", coords=[], zoom_start=13): #colormap min_value = df_aggreg["value"].min() max_value = df_aggreg["value"].max() m = round((min_value + max_value) / 2, 0) #take resolution from the first row res = h3.h3_get_resolution(df_aggreg.loc[0, 'hex_id']) if initial_map is None: initial_map = Map( location=coords, zoom_start=zoom_start, tiles="cartodbpositron", attr= '© <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors © <a href="http://cartodb.com/attributions#basemaps">CartoDB</a>' ) #the colormap #color names accepted https://github.com/python-visualization/branca/blob/master/branca/_cnames.json if kind == "linear": custom_bcm = bcm.LinearColormap(['green', 'yellow', 'red'], vmin=min_value, vmax=max_value) elif kind == "outlier": #for outliers, values would be -11,0,1 custom_bcm = bcm.LinearColormap(['blue', 'white', 'red'], vmin=min_value, vmax=max_value) elif kind == "filled_nulls": custom_bcm = bcm.LinearColormap(['sienna', 'green', 'yellow', 'red'], index=[0, min_value, m, max_value], vmin=min_value, vmax=max_value) #create geojson data from dataframe geojson_data = hexagons_dataframe_to_geojson(df_hex=df_aggreg) #plot on map name_layer = "Choropleth " + str(res) if kind != "linear": name_layer = name_layer + kind GeoJson(geojson_data, style_function=lambda feature: { 'fillColor': custom_bcm(feature['properties']['value']), 'color': border_color, 'weight': 1, 'fillOpacity': fill_opacity }, name=name_layer).add_to(initial_map) #add legend (not recommended if multiple layers) if with_legend == True: custom_bcm.add_to(initial_map) return initial_map
def h3_choropleth_map(df_aggreg: pd.DataFrame, value_to_map: str, kind: str, hour: int, border_color='black', fill_opacity=0.7, initial_map=None, map_center=[34.0522, -118.2437], with_legend=True): """ Builds a folium choropleth map from an df containing H3 hex cells and some cell value such as 'count'. parameters ---------- df_aggreg:pd.DataFrame - df with H3 hex cells in col ['hex_id'] and at least one col ['value_to_map'] for cell color. value_to_map:str - column name in df to scale and color cells by returns ---------- initial_map:folium.Map """ # take resolution from the first row res = h3.h3_get_resolution(df_aggreg.loc[0, 'hex_id']) if hour is not None: df_aggreg = df_aggreg[df_aggreg.hour == hour] else: df_aggreg = df_aggreg.groupby(['hex_id']).agg({value_to_map: 'sum', 'geometry': 'first', 'hex_id': 'first'}) # create geojson data from dataframe geojson_data = hexagons_dataframe_to_geojson(df_hex=df_aggreg) if initial_map is None: initial_map = Map(location=[34.0522, -118.2437], zoom_start=11, tiles="cartodbpositron", attr='© <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors © <a href="http://cartodb.com/attributions#basemaps">CartoDB</a>' ) if value_to_map: # colormap min_value = df_aggreg[value_to_map].min() max_value = df_aggreg[value_to_map].max() m = round((min_value + max_value) / 2, 0) # color names accepted https://github.com/python-visualization/branca/blob/master/branca/_cnames.json if kind == "linear": custom_cm = cm.LinearColormap(['green', 'yellow', 'red'], vmin=min_value, vmax=max_value) elif kind == "outlier": # for outliers, values would be -11,0,1 custom_cm = cm.LinearColormap(['blue', 'white', 'red'], vmin=min_value, vmax=max_value) elif kind == "filled_nulls": custom_cm = cm.LinearColormap(['sienna', 'green', 'yellow', 'red'], index=[0, min_value, m, max_value], vmin=min_value, vmax=max_value) # plot on map name_layer = "Choropleth " + str(res) if kind != "linear": name_layer = name_layer + kind GeoJson( geojson_data, style_function=lambda feature: { 'fillColor': custom_cm(feature['properties'][value_to_map]), 'color': border_color, 'weight': 1, 'fillOpacity': fill_opacity }, name=name_layer ).add_to(initial_map) # add legend (not recommended if multiple layers) if with_legend == True: custom_cm.add_to(initial_map) else: # plot on map name_layer = "Choropleth " + str(res) if kind != "linear": name_layer = name_layer + kind GeoJson( geojson_data, style_function=lambda feature: { 'fillColor': 'blue', 'color': 'border_color', 'weight': 1, 'fillOpacity': fill_opacity }, name=name_layer ).add_to(initial_map) return initial_map
def main(us_hexagons, historical_sightings, model_file, debug, output_file): logger.info(f"Reading hexagons from {us_hexagons.name}.") squatchcast_locations = pd.read_csv(us_hexagons) logger.info(f"Read {squatchcast_locations.shape[0]} hexagons.") logger.info( f"Reading historical sightings from {historical_sightings.name}.") historical_sightings_frame = pd.read_csv(historical_sightings).query( "~latitude.isnull()") logger.info( f"Read {historical_sightings_frame.shape[0]} historical_sightings.") if debug: logger.warning("Debug selected, pulling top five records.") squatchcast_locations = squatchcast_locations.head() num_locations = squatchcast_locations.shape[0] lats = [] lons = [] logger.info("Extracting hexagon lat / lon values.") for _, row in tqdm(squatchcast_locations.iterrows(), total=num_locations): lat, lon = h3.h3_to_geo(row.hex_address) lats.append(lat) lons.append(lon) squatchcast_locations.loc[:, "latitude"] = lats squatchcast_locations.loc[:, "longitude"] = lons session = requests.Session() logger.info(f"Retrieving the weather for {num_locations} " "locations.") weather_conditions = [] failed = 0 for _, row in tqdm(squatchcast_locations.iterrows(), total=num_locations): request = create_weather_request(row.latitude, row.longitude, DARK_SKY_KEY) try: weather_response = session.get(request) # Make sure the response worked. weather_response.raise_for_status() # Now parse the json. weather_conditions.append(weather_response.json()) except requests.HTTPError: failed += 1 logger.info(f"{failed} requests to Dark Sky failed.") # Extract the features a list of dicts. Plan is to turn that into a # data frame and concatenate them to the squatchcast_locations. logger.info("Unpacking weather results.") squatchcast_features = [] for weather in tqdm(weather_conditions, total=num_locations): # Append the current features. daily = get_in(["daily", "data"], weather, []) latitude = get("latitude", weather, np.nan) longitude = get("longitude", weather, np.nan) for conditions in daily: get_condition = curry(get)(seq=conditions, default=np.nan) squatchcast_features.append({ "date": datetime.utcfromtimestamp( get_condition("time")).strftime("%Y-%m-%d"), "latitude": latitude, "longitude": longitude, "precip_type": get("precipType", conditions, "no_precipitation"), "temperature_high": get_condition("temperatureHigh"), "temperature_low": get_condition("temperatureLow"), "dew_point": get_condition("dewPoint"), "humidity": get_condition("humidity"), "cloud_cover": get_condition("cloudCover"), "moon_phase": get_condition("moonPhase"), "precip_intensity": get_condition("precipIntensity"), "precip_probability": get_condition("precipProbability"), "pressure": get_condition("pressure"), "uv_index": get_condition("uvIndex"), "visibility": get_condition("visibility"), "wind_bearing": get_condition("windBearing"), "wind_speed": get_condition("windSpeed"), }) squatchcast_frame = pd.DataFrame.from_records(squatchcast_features) logger.info(f"Loading model from {model_file}.") model = load(model_file) logger.info( f"Getting predictions for {squatchcast_frame.shape[0]} locations.") with yaspin(text="👣 Calculating squatchcast. 👣", color="cyan"): squatchcast_frame.loc[:, "squatchcast"] = model.predict_proba( squatchcast_frame[RAW_FEATURES])[:, 1] # Get the resoluton the US hexagon file is at and index the squatchcast # results by that resolution. us_resolution = h3.h3_get_resolution( squatchcast_locations.head(1).hex_address[0]) squatchcast_frame.loc[:, "hex_address"] = np.apply_along_axis( lambda x: h3.geo_to_h3(x[0], x[1], us_resolution), axis=1, arr=squatchcast_frame[["latitude", "longitude"]].values, ) historical_sightings_frame.loc[:, "hex_address"] = np.apply_along_axis( lambda x: h3.geo_to_h3(x[0], x[1], us_resolution), axis=1, arr=historical_sightings_frame[["latitude", "longitude"]].values, ) historical_sightings_agg = ( historical_sightings_frame.groupby("hex_address").agg({ "number": "count" }).reset_index()) # Now we need, for each day, a complete hexagonification of the US. We'll # do this in a groupby and concatenate. visualization_frames = [] for date, frame in squatchcast_frame.groupby("date"): # Merge weather and US hexagons. weather_location_merge = pd.merge( squatchcast_locations.drop(columns=["latitude", "longitude"]), frame, on="hex_address", how="left", ) # Merge historical sightings. visualization_frames.append( pd.merge( weather_location_merge, historical_sightings_agg, on="hex_address", how="left", ).fillna(0).astype({ "number": "int" }).rename(columns={"number": "historical_sightings"})) pd.concat(visualization_frames).to_csv(output_file, index=False)