Exemplo n.º 1
0
def connect_points(start, end):
    """Convert a series or array of points to an array or series of lines.

    Parameters
    ----------
    start : Series or ndarray
    end : Series or ndarray

    Returns
    -------
    Series or ndarray
    """

    is_series = False

    if isinstance(start, pd.Series):
        is_series = True
        index = start.index
        start = start.values
    if isinstance(end, pd.Series):
        end = end.values

    x1 = pg.get_x(start)
    y1 = pg.get_y(start)
    x2 = pg.get_x(end)
    y2 = pg.get_y(end)

    lines = pg.linestrings(np.array([[x1, x2], [y1, y2]]).T)

    if is_series:
        return pd.Series(lines, index=index)

    return lines
Exemplo n.º 2
0
def mark_duplicates(df, tolerance):
    """Mark points that are within tolerance of each other to the first record.

    WARNING: no evaluation of the underlying attribute values is performed,
    only spatial de-duplication.

    Parameters
    ----------
    df : GeoDataFrame with columns
        "duplicate" (True if a duplicate EXCEPT first of each duplicate)
        "dup_group" id of each set of duplicates INCLUDING the first of each duplicate
        "dup_count" number of duplicates per duplicate group
    tolerance : number
        distance (in projection units) within which all points are dropped except the first.
    """

    df["temp_x"] = (pg.get_x(df.geometry.values.data) / tolerance).round().astype(
        "int"
    ) * tolerance
    df["temp_y"] = (pg.get_y(df.geometry.values.data) / tolerance).round().astype(
        "int"
    ) * tolerance

    # assign duplicate group ids
    grouped = df.groupby(["temp_x", "temp_y"])
    df["dup_group"] = grouped.grouper.group_info[0]
    df = df.join(grouped.size().rename("dup_count"), on=["temp_x", "temp_y"])
    dedup = df.drop_duplicates(subset=["dup_group"], keep="first")
    df["duplicate"] = False
    df.loc[~df.index.isin(dedup.index), "duplicate"] = True

    return df.drop(columns=["temp_x", "temp_y"])
Exemplo n.º 3
0
def get_y(data):
    if compat.USE_PYGEOS:
        return pygeos.get_y(data)
    else:
        return _unary_op("y", data, null_value=np.nan)
Exemplo n.º 4
0
# ignore estimated dams or ones on very small streams
df.loc[
    (df.LowheadDam == -1)
    & (df.ImpoundmentType == 1)
    & (df.Height <= 25)
    & (~df.is_estimated)
    & (df.snapped)
    & (df.sizeclass != "1a"),
    "LowheadDam",
] = 2


### Add lat / lon (must be done after snapping!)
print("Adding lat / lon fields")
geo = df[["geometry"]].to_crs(GEO_CRS)
geo["lat"] = pg.get_y(geo.geometry.values.data).astype("float32")
geo["lon"] = pg.get_x(geo.geometry.values.data).astype("float32")
df = df.join(geo[["lat", "lon"]])


### All done processing!

print("\n--------------\n")
df = df.reset_index(drop=True)

print("Serializing {:,} dams to master file".format(len(df)))
df.to_feather(master_dir / "dams.feather")
write_dataframe(df, qa_dir / "dams.fgb")


# Extract out only the snapped ones
Exemplo n.º 5
0
 def y(self):
     return pygeos.get_y(self)
Exemplo n.º 6
0
def test_get_y():
    assert pygeos.get_y([point, point_z]).tolist() == [3.0, 1.0]
    def _pandas(cls, column, **kwargs):

        column_shape_format = kwargs.get("column_shape_format")
        place = kwargs.get("place")
        geocoder = kwargs.get("geocoder")
        geocoder_config = kwargs.get("geocoder_config")
        min_value = kwargs.get("min_value")
        max_value = kwargs.get("max_value")
        strict_min = kwargs.get("strict_min")
        strict_max = kwargs.get("strict_max")
        units = kwargs.get("units")

        if min_value is None and max_value is None:
            raise ValueError("min_value and max_value cannot both be None")
        if min_value is not None and max_value is not None and min_value > max_value:
            raise ValueError("min_value cannot be greater than max_value")

        if geocoder not in ["nominatim", "pickpoint", "openmapquest"]:
            raise NotImplementedError(
                "The geocoder is not implemented for this method.")

        # find the reference shape with the geocoder.
        if geocoder is not None:
            try:
                # Specify the default parameters for Nominatim and run query. User is responsible for config and query params otherwise.
                query_params = dict(exactly_one=True, geometry="wkt")
                location = cls.geocode(geocoder, geocoder_config, place,
                                       query_params)
            except:
                raise Exception(
                    "Geocoding configuration and query failed to produce a valid result."
                )
        else:
            raise Exception(
                "A valid geocoder must be provided for this method. See GeoPy for reference."
            )

        # Load the column into a pygeos Geometry vector from numpy array (Series not supported).
        if column_shape_format == "wkt":
            shape_test = geos.from_wkt(column.to_numpy(), on_invalid="ignore")
        elif column_shape_format == "wkb":
            shape_test = geos.from_wkb(column.to_numpy(), on_invalid="ignore")
        elif column_shape_format == "lonlat":
            shape_df = pd.DataFrame(column.to_list(), columns=("lon", "lat"))
            shape_test = geos.points(shape_df.lon, y=shape_df.lat)
        elif column_shape_format == "latlon":
            shape_df = pd.DataFrame(column.to_list(), columns=("lat", "lon"))
            shape_test = geos.points(shape_df.lon, y=shape_df.lat)
        else:
            raise NotImplementedError(
                "Column values shape format not implemented.")

        # verify that all shapes are points and if not, convert to centroid point.
        points_test = pd.Series(shape_test)
        if not points_test.apply(lambda x: geos.get_type_id(x) == 0).all():
            points_test = points_test.map(geos.centroid)

        # convert the geos point to a geopy point.
        points_test = points_test.apply(
            lambda x: lonlat(geos.get_x(x), geos.get_y(x)))

        if location is None:
            raise Exception("Geocoding failed to return a result.")
        else:
            point_ref = lonlat(location.longitude, location.latitude)

        # calculate the distance between the points using geopy
        if units in [
                "km", "kilometers", "kilometres", "kilometer", "kilometre"
        ]:
            column_dist = points_test.apply(
                lambda p: distance(p, point_ref).km)
        elif units in ["m", "meters", "metres", "meter", "metre"]:
            column_dist = points_test.apply(lambda p: distance(p, point_ref).m)
        elif units in ["mi", "miles", "mile"]:
            column_dist = points_test.apply(
                lambda p: distance(p, point_ref).mi)
        elif units in ["ft", "feet", "foot"]:
            column_dist = points_test.apply(
                lambda p: distance(p, point_ref).ft)
        else:
            raise NotImplementedError(
                "Unit conversion has not yet been implemented. Please use one of km, m, mi, ft"
            )

        # Evaluate the between statement (from column_values_between.py)
        if min_value is None:
            if strict_max:
                return column_dist < max_value
            else:
                return column_dist <= max_value

        elif max_value is None:
            if strict_min:
                return min_value < column_dist
            else:
                return min_value <= column_dist

        else:
            if strict_min and strict_max:
                return (min_value < column_dist) & (column_dist < max_value)
            elif strict_min:
                return (min_value < column_dist) & (column_dist <= max_value)
            elif strict_max:
                return (min_value <= column_dist) & (column_dist < max_value)
            else:
                return (min_value <= column_dist) & (column_dist <= max_value)