def convert_gazetteer_files_to_dataframe( self, gazetteer_files: List[GazetteerFile] ) -> Optional[DataFrame]: """Convert one or more Gazetteer files to a dataframe. Skips over null values produced by invalid responses from the Gazetteer file endpoint. """ subsets = [] gazetteer_tables: Iterable[DataFrame] = filter(partial(is_not, None), gazetteer_files) nad_83_epsg = 4269 for gazetteer_table in gazetteer_tables: subset = GeoDataFrame( gazetteer_table, geometry=gpd.points_from_xy( gazetteer_table['INTPTLONG'], gazetteer_table['INTPTLAT'] ), ) subset.crs = f'EPSG:{nad_83_epsg}' subset['gazetteer_geo_id'] = subset.apply( lambda row: normalize_geo_id(row['GEOID'], row['gazetteer_geo_type']), axis=1, ) subsets.append(subset) # Return null value if no dataframes were obtained if not subsets: return None # Concatenate dataframes and return dataframe: GeoDataFrame = pd.concat(subsets) return dataframe
def plot_metric(ax: Axes, metric: PlotMetric, gdf: GeoDataFrame) -> Axes: # type: ignore # nicely format the colorbar divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.1) gdf.plot( # type: ignore metric.metric, ax=ax, legend=True, cmap=metric.cmap, vmin=metric.vmin, vmax=metric.vmax, cax=cax) ax.set_title(f'{metric.metric.upper()}') return ax
def plot_all_regional_error_metrics( self, gdf: GeoDataFrame, # type: ignore title: str = "", **kwargs: Dict, ) -> Tuple[Figure, List[Axes]]: """Plot area-based maps of the scores""" assert np.isin(["rmse", "mae", "r2"], gdf.columns).all() # type: ignore gdf = gdf.dropna(subset=["rmse", "mae", "r2"]) # type: ignore # get the PlotMetric objects rmse = self.get_metric("rmse", gdf, **kwargs) mae = self.get_metric("mae", gdf, **kwargs) r2 = self.get_metric("r2", gdf, **kwargs) # build multi-axis plot fig, axs = plt.subplots(1, 3, figsize=(12, 8)) for i, metric in enumerate([rmse, mae, r2]): ax = axs[i] ax = self.plot_metric(gdf=gdf, ax=ax, metric=metric) ax.axis("off") fig.suptitle(title) return fig, axs
def main(): filenames = getFileNames() print("Loading Shapefiles from " + filenames["shapefile"]) # Open up the shapefile states = GeoDataFrame.from_file(filenames["shapefile"]) # Set the 'Name' column as the index states.set_index("NAME", inplace=True) print("Done!") print("Loading data from " + filenames["data"]) # Load the file the user wants to process data = LoadCSV(filenames["data"]) print("Done!") # list out the columns in the data file PickColumns(data) # ask the user which columns they want to use for lat/lon lat_col = prompt.query("Please enter the column number for Latitude: ", default="7", validators=[]) lon_col = prompt.query("Please enter the column number for Longitude: ", default="8", validators=[]) # Wrap them in ints because they need to be referenced as numbers later on lat_col = int(lat_col) lon_col = int(lon_col) # Add a State column to the data file data = AddColumn(data, "State") # Process each row and add the state name to the new column data["State"] = data.apply(lambda row: GetStateFromPoint(row, lat_col, lon_col, states), axis=1) print("Writing file to " + filenames["output"]) data.to_csv(filenames["output"]) print("Done!")
def join_model_performances_to_geometry( self, model_performance_df: pd.DataFrame, admin_name: str) -> GeoDataFrame: # type: ignore """Join the `geometry` column from the shapefile read in as GeoDataFrame to the model performance metrics in model_performance_df. Required to make spatial plots of data. Arguments: --------- model_performance_df: pd.DataFrame the data showing the model performance for each admin_name: str the name of the administrative units (shapefile name) stored in `self.region_gdfs.keys()` """ assert admin_name in [ k for k in self.region_gdfs.keys() ], ("Invalid " f"`admin_name`. Expected one of: {[k for k in self.region_gdfs.keys()]}" f" Got: {admin_name}") gdf = self.region_gdfs[admin_name].gdf gdf_colname = self.region_gdfs[admin_name].gdf_colname gdf[gdf_colname] = gdf[gdf_colname].apply(str.rstrip).apply(str.lstrip) df_colname = "region_name" out_gdf = GeoDataFrame( # type: ignore pd.merge( model_performance_df, gdf[[gdf_colname, "geometry"]], left_on=df_colname, right_on=gdf_colname, )) return out_gdf
def get_choices(self, layer): # get_schema calls DescribeFeatureType and returns mangled property names # properties = get_schema(layer.server.url, layer.layername, version=layer.server.version).get('properties',{}) # return ((prop,prop) for prop in properties.keys()) response = layer.server.service.getfeature(typename=layer.layername, maxfeatures=1, outputFormat='GeoJSON') data = json.loads(response.read()) features = GeoDataFrame.from_features(data) return ((col, col) for col in features.columns)
def get_average_prices(ad_type, asset_type): qfilter = {'ad_type': ad_type, 'asset_type': asset_type} prices = session.execute( 'SELECT postcode, avg_price FROM inmosig_average_prices WHERE ad_type = :ad_type AND ' 'asset_type = :asset_type', qfilter) gdf = GeoDataFrame(columns=['geometry', 'price', 'postcode']) for price in prices.fetchall(): postcode = postcode_dao.search_by_postcode(price[0]) if postcode is not None: gdf = gdf.append( { 'geometry': to_shape(postcode.geom), 'price': float(price[1]), 'postcode': price[0] }, ignore_index=True) return gdf
def extract_footprint_from_prism(path): citygml = etree.parse(path) root = citygml.getroot() if root.tag == "{http://www.opengis.net/citygml/1.0}CityModel": ns_citygml = "http://www.opengis.net/citygml/1.0" ns_gml = "http://www.opengis.net/gml" ns_bldg = "http://www.opengis.net/citygml/building/1.0" else: ns_citygml = "http://www.opengis.net/citygml/2.0" ns_gml = "http://www.opengis.net/gml" ns_bldg = "http://www.opengis.net/citygml/building/2.0" city_objects = [] buildings = [] footprints_by_floor = {} for obj in root.getiterator('{%s}cityObjectMember' % ns_citygml): city_objects.append(obj) if len(city_objects) > 0: for city_object in city_objects: for child in city_object.getchildren(): if child.tag == '{%s}Building' % ns_bldg: buildings.append(child) for b in buildings: rooms = room_finder(b) for room in rooms: polys = polygon_finder(room) footprint, height = extract_footprint(polys) if height not in footprints_by_floor: footprints_by_floor[height] = [] footprints_by_floor[height].append(Polygon(footprint)) footprints_of_buildings = GeoDataFrame() for i in range(len(footprints_by_floor[0.0])): footprints_of_buildings.loc[i, 'geometry'] = footprints_by_floor[0.0][i] return footprints_of_buildings
def _plot_single_gdf(ax: Axes, gdf: GeoDataFrame, column_to_plot: str, title: Optional[str] = None, cmap: Optional[str] = 'viridis', vmin: Optional[float] = None, vmax: Optional[float] = None) -> Axes: # nicely format the colorbar divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.1) gdf.plot(column_to_plot, ax=ax, legend=True, cmap=cmap, vmin=vmin, vmax=vmax, cax=cax) ax.set_title(title) return ax
def plot_regional_error_metric( self, gdf: GeoDataFrame, # type: ignore selection: str) -> Tuple[Figure, Axes]: valid_metrics = ['rmse', 'mae', 'r2'] assert selection in valid_metrics, 'Expecting selection' \ f' to be one of: {valid_metrics}' gdf = gdf.dropna(subset=valid_metrics) # type: ignore metric = self.get_metric(selection) fig, ax = plt.subplots() ax = self.plot_metric(gdf=gdf, ax=ax, metric=metric) return fig, ax
def plot_regional_error_metric( self, gdf: GeoDataFrame, # type: ignore selection: str, **kwargs: Dict, ) -> Tuple[Figure, Axes]: valid_metrics = ["rmse", "mae", "r2"] assert selection in valid_metrics, ("Expecting selection" f" to be one of: {valid_metrics}") gdf = gdf.dropna(subset=valid_metrics) # type: ignore metric = self.get_metric(selection, gdf, **kwargs) fig, ax = plt.subplots() ax = self.plot_metric(gdf=gdf, ax=ax, metric=metric) return fig, ax
def slice_polys(imgf, size=(512, 512), overlap=6): """ Get Polygons Corresponding to Slices """ ix_row = np.arange(0, imgf.meta["height"], size[0] - overlap) ix_col = np.arange(0, imgf.meta["width"], size[1] - overlap) lats = np.linspace(imgf.bounds.bottom, imgf.bounds.top, imgf.meta["height"]) longs = np.linspace(imgf.bounds.left, imgf.bounds.right, imgf.meta["width"]) polys = [] for i in range(len(ix_row) - 1): for j in range(len(ix_col) - 1): box = shapely.geometry.box(longs[ix_col[j]], lats[ix_row[i]], longs[ix_col[j + 1]], lats[ix_row[i + 1]]) polys.append(box) return GeoDataFrame(geometry=polys, crs=imgf.meta["crs"].to_string())
def plot_all_regional_error_metrics( self, gdf: GeoDataFrame, # type: ignore title: str = '') -> Tuple[Figure, List[Axes]]: """Plot area-based maps of the scores""" assert np.isin(['rmse', 'mae', 'r2'], gdf.columns).all() # type: ignore gdf = gdf.dropna(subset=['rmse', 'mae', 'r2']) # type: ignore # get the PlotMetric objects rmse = self.get_metric('rmse') mae = self.get_metric('mae') r2 = self.get_metric('r2') # build multi-axis plot fig, axs = plt.subplots(1, 3, figsize=(24, 6)) for i, metric in enumerate([rmse, mae, r2]): ax = axs[i] ax = self.plot_metric(gdf=gdf, ax=ax, metric=metric) fig.set_suptitle(title) return fig, axs
def merge_all_model_performances_gdfs( self, all_models_df: pd.DataFrame) -> GeoDataFrame: # type: ignore all_gdfs: List[GeoDataFrame] = [] # type: ignore assert "admin_level_name" in all_models_df.columns, ( f"Expect to find admin_region" f"in {all_models_df.columns}") # join the geometry columns to make GeoDataFrames for admin_name in all_models_df.admin_level_name.unique(): admin_level_df = all_models_df.loc[all_models_df.admin_level_name == admin_name] all_gdfs.append( self.join_model_performances_to_geometry( model_performance_df=admin_level_df, admin_name=admin_name)) self.gdf = pd.concat(all_gdfs) # convert mean model outputs to float try: self.gdf = self.gdf.astype( # type: ignore { "predicted_mean_value": "float64", "true_mean_value": "float64" }) except KeyError: self.gdf = self.gdf.astype( # type: ignore { "rmse": "float64", "mae": "float64", "r2": "float64" }) print("* Assigned the complete GeoDataFrame to `RegionGeoPlotter.gdf`") if not isinstance(self.gdf, GeoDataFrame): # type: ignore self.gdf = GeoDataFrame(self.gdf) # type: ignore return self.gdf
def simplify_and_mapping(data_source): if data_source == 'lwm': tau = 2 buildings = extract_footprint_from_prism('../data/lwm-prism.gml') else: # Only for Manhattan, New York tau = 0.00003 buildings = ox.footprints_from_place( '{}, Manhattan, New York City'.format(data_source)) douglas_peucker_buildings = GeoDataFrame() simplified_buildings = GeoDataFrame() sum_haus = [0.0, 0.0] total_points = [0, 0] tolerance = tau * 3 / 5 def comparison(footprint, i): new_footprint = prism.simplify(footprint, tau=tau, epsilon=math.pi / 30) if new_footprint is not None: simplified_buildings.loc[i, 'geometry'] = new_footprint haus = footprint.hausdorff_distance(new_footprint) sum_haus[1] += haus total_points[1] += len(new_footprint.exterior.coords) dp_footprint = footprint.simplify(tolerance) douglas_peucker_buildings.loc[i, 'geometry'] = dp_footprint haus = footprint.hausdorff_distance(dp_footprint) sum_haus[0] += haus total_points[0] += len(dp_footprint.exterior.coords) count = 0 for geom in buildings['geometry']: if geom.geom_type == 'Polygon': comparison(geom, count) count += 1 if geom.geom_type == 'MultiPolygon': for poly in geom: comparison(poly, count) count += 1 print("Average Hausdorff Distance (Douglas Peucker):", sum_haus[0] / count) print("Average Hausdorff Distance (Indoor Simplification):", sum_haus[1] / count) print("Total Number of Points (Douglas Peucker):", total_points[0]) print("Total Number of Points (Indoor Simplification):", total_points[1]) cell_text = [[tolerance, tau], [sum_haus[0] / count, sum_haus[1] / count], [total_points[0], total_points[1]]] # mapping minx, miny, maxx, maxy = buildings.total_bounds map_scale = 50 width = maxx - minx height = maxy - miny ratio = width / height mbr = (ratio * map_scale, map_scale) fig, ax = plt.subplots(figsize=mbr) buildings.plot(ax=ax, facecolor='green', edgecolor='grey', linewidth=0.2, alpha=0.1) douglas_peucker_buildings.plot(ax=ax, facecolor='blue', alpha=0.1) simplified_buildings.plot(ax=ax, facecolor='red', alpha=0.1) ax.table(cellText=cell_text, rowLabels=[ "Distance Tolerance", "Average Hausdorff Distance", "Total Number of Points" ], colLabels=["Douglas Peucker", "Indoor Simplification"], colWidths=[0.05 / ratio, 0.05 / ratio], loc='lower right') legend_elements = [ Patch(facecolor='green', edgecolor='grey', linewidth=0.2, alpha=0.1, label='Original'), Patch(facecolor='blue', alpha=0.1, label='Douglas Peucker'), Patch(facecolor='red', alpha=0.1, label='Indoor Simplification') ] ax.legend(handles=legend_elements, loc='upper right', title='Simplification Method', fontsize=map_scale, title_fontsize=map_scale) plt.tight_layout() plt.savefig('../examples/{}.pdf'.format(data_source), format='pdf')
postcode_dao = PostcodeDAO(session) way_dao = WayDAO(session) # Representación de las calles con GeoSeries # Extraemos todas las calles ways = [] for way in way_dao.getAll(): ways.append(to_shape(way.geom)) wgs = GeoSeries(ways) base = wgs.plot(color="blue") qfilter = {'ad_type': 'RENT', 'asset_type': 'GARAGE'} prices = session.execute( 'SELECT postcode, avg_price FROM inmosig_average_prices WHERE ad_type = :ad_type AND ' 'asset_type = :asset_type', qfilter) gdf = GeoDataFrame(columns=['geometry', 'price', 'postcode']) for price in prices.fetchall(): postcode = postcode_dao.search_by_postcode(price[0]) if postcode is not None: gdf = gdf.append( { 'geometry': to_shape(postcode.geom), 'price': float(price[1]), 'postcode': price[0] }, ignore_index=True) gdf.head() gdf.plot(ax=base, column='price', cmap='OrRd', scheme="quantiles", legend=True) plt.show()
def to_crs(self, epsg_to: str) -> BaseGeometry: if self.epsg_code == epsg_to: return self.geo gdf = GeoDataFrame(crs=self.epsg_code, geometry=[self.geo]) return gdf.to_crs(epsg_to).geometry.values[0]