def connect_points(start, end): """Convert a series or array of points to an array or series of lines. Parameters ---------- start : Series or ndarray end : Series or ndarray Returns ------- Series or ndarray """ is_series = False if isinstance(start, pd.Series): is_series = True index = start.index start = start.values if isinstance(end, pd.Series): end = end.values x1 = pg.get_x(start) y1 = pg.get_y(start) x2 = pg.get_x(end) y2 = pg.get_y(end) lines = pg.linestrings(np.array([[x1, x2], [y1, y2]]).T) if is_series: return pd.Series(lines, index=index) return lines
def mark_duplicates(df, tolerance): """Mark points that are within tolerance of each other to the first record. WARNING: no evaluation of the underlying attribute values is performed, only spatial de-duplication. Parameters ---------- df : GeoDataFrame with columns "duplicate" (True if a duplicate EXCEPT first of each duplicate) "dup_group" id of each set of duplicates INCLUDING the first of each duplicate "dup_count" number of duplicates per duplicate group tolerance : number distance (in projection units) within which all points are dropped except the first. """ df["temp_x"] = (pg.get_x(df.geometry.values.data) / tolerance).round().astype( "int" ) * tolerance df["temp_y"] = (pg.get_y(df.geometry.values.data) / tolerance).round().astype( "int" ) * tolerance # assign duplicate group ids grouped = df.groupby(["temp_x", "temp_y"]) df["dup_group"] = grouped.grouper.group_info[0] df = df.join(grouped.size().rename("dup_count"), on=["temp_x", "temp_y"]) dedup = df.drop_duplicates(subset=["dup_group"], keep="first") df["duplicate"] = False df.loc[~df.index.isin(dedup.index), "duplicate"] = True return df.drop(columns=["temp_x", "temp_y"])
def get_x(data): if compat.USE_PYGEOS: return pygeos.get_x(data) else: return _unary_op("x", data, null_value=np.nan)
df.loc[ (df.LowheadDam == -1) & (df.ImpoundmentType == 1) & (df.Height <= 25) & (~df.is_estimated) & (df.snapped) & (df.sizeclass != "1a"), "LowheadDam", ] = 2 ### Add lat / lon (must be done after snapping!) print("Adding lat / lon fields") geo = df[["geometry"]].to_crs(GEO_CRS) geo["lat"] = pg.get_y(geo.geometry.values.data).astype("float32") geo["lon"] = pg.get_x(geo.geometry.values.data).astype("float32") df = df.join(geo[["lat", "lon"]]) ### All done processing! print("\n--------------\n") df = df.reset_index(drop=True) print("Serializing {:,} dams to master file".format(len(df))) df.to_feather(master_dir / "dams.feather") write_dataframe(df, qa_dir / "dams.fgb") # Extract out only the snapped ones df = df.loc[df.snapped & (~(df.duplicate | df.dropped | df.excluded))].reset_index(
def x(self): return pygeos.get_x(self)
def test_get_x(): assert pygeos.get_x([point, point_z]).tolist() == [2.0, 1.0]
def _pandas(cls, column, **kwargs): column_shape_format = kwargs.get("column_shape_format") place = kwargs.get("place") geocoder = kwargs.get("geocoder") geocoder_config = kwargs.get("geocoder_config") min_value = kwargs.get("min_value") max_value = kwargs.get("max_value") strict_min = kwargs.get("strict_min") strict_max = kwargs.get("strict_max") units = kwargs.get("units") if min_value is None and max_value is None: raise ValueError("min_value and max_value cannot both be None") if min_value is not None and max_value is not None and min_value > max_value: raise ValueError("min_value cannot be greater than max_value") if geocoder not in ["nominatim", "pickpoint", "openmapquest"]: raise NotImplementedError( "The geocoder is not implemented for this method.") # find the reference shape with the geocoder. if geocoder is not None: try: # Specify the default parameters for Nominatim and run query. User is responsible for config and query params otherwise. query_params = dict(exactly_one=True, geometry="wkt") location = cls.geocode(geocoder, geocoder_config, place, query_params) except: raise Exception( "Geocoding configuration and query failed to produce a valid result." ) else: raise Exception( "A valid geocoder must be provided for this method. See GeoPy for reference." ) # Load the column into a pygeos Geometry vector from numpy array (Series not supported). if column_shape_format == "wkt": shape_test = geos.from_wkt(column.to_numpy(), on_invalid="ignore") elif column_shape_format == "wkb": shape_test = geos.from_wkb(column.to_numpy(), on_invalid="ignore") elif column_shape_format == "lonlat": shape_df = pd.DataFrame(column.to_list(), columns=("lon", "lat")) shape_test = geos.points(shape_df.lon, y=shape_df.lat) elif column_shape_format == "latlon": shape_df = pd.DataFrame(column.to_list(), columns=("lat", "lon")) shape_test = geos.points(shape_df.lon, y=shape_df.lat) else: raise NotImplementedError( "Column values shape format not implemented.") # verify that all shapes are points and if not, convert to centroid point. points_test = pd.Series(shape_test) if not points_test.apply(lambda x: geos.get_type_id(x) == 0).all(): points_test = points_test.map(geos.centroid) # convert the geos point to a geopy point. points_test = points_test.apply( lambda x: lonlat(geos.get_x(x), geos.get_y(x))) if location is None: raise Exception("Geocoding failed to return a result.") else: point_ref = lonlat(location.longitude, location.latitude) # calculate the distance between the points using geopy if units in [ "km", "kilometers", "kilometres", "kilometer", "kilometre" ]: column_dist = points_test.apply( lambda p: distance(p, point_ref).km) elif units in ["m", "meters", "metres", "meter", "metre"]: column_dist = points_test.apply(lambda p: distance(p, point_ref).m) elif units in ["mi", "miles", "mile"]: column_dist = points_test.apply( lambda p: distance(p, point_ref).mi) elif units in ["ft", "feet", "foot"]: column_dist = points_test.apply( lambda p: distance(p, point_ref).ft) else: raise NotImplementedError( "Unit conversion has not yet been implemented. Please use one of km, m, mi, ft" ) # Evaluate the between statement (from column_values_between.py) if min_value is None: if strict_max: return column_dist < max_value else: return column_dist <= max_value elif max_value is None: if strict_min: return min_value < column_dist else: return min_value <= column_dist else: if strict_min and strict_max: return (min_value < column_dist) & (column_dist < max_value) elif strict_min: return (min_value < column_dist) & (column_dist <= max_value) elif strict_max: return (min_value <= column_dist) & (column_dist < max_value) else: return (min_value <= column_dist) & (column_dist <= max_value)