示例#1
0
def export_duplicate_areas(dups, path):
    """Export duplicate barriers to a geopackage for QA.

    Parameters
    ----------
    dups : DataFrame
        contains pygeos geometries in "geometry" and "dup_group"
        to indicate group
    path : str or Path
        output path
    """
    dups["geometry"] = pg.buffer(dups.geometry, dups.dup_tolerance)
    dissolved = dissolve(dups[["geometry", "dup_group"]], by="dup_group")
    groups = (dups[["id", "SARPID", "dup_group"
                    ]].join(dissolved.geometry,
                            on="dup_group").groupby("dup_group").agg({
                                "geometry":
                                "first",
                                "SARPID":
                                "unique",
                                "id":
                                "unique"
                            }))
    groups["id"] = groups.id.apply(lambda x: ", ".join([str(s) for s in x]))
    groups["SARPID"] = groups.SARPID.apply(
        lambda x: ", ".join([str(s) for s in x]))
    to_gpkg(groups, path, crs=CRS)
示例#2
0
def export_snap_dist_lines(df, original_locations, out_dir, prefix=""):
    """Creates lines from the original coordinate to the snapped coordinate
    to help QA/QC snapping operation.

    Creates geopackages in out_dir:
    - pre_snap_to_post_snap: line between snapped and unsnapped coordinate
    - pre_snap: unsnapped points
    - post_snap: snapped points

    Parameters
    ----------
    df : DataFrame
        contains pygeos geometries in "geometry" column
    original_locations : DataFrame
        contains pygeos geometries in "geometry" column
    out_dir : Path
    prefix : str
        prefix to add to filename
    """
    tmp = df.loc[
        df.snapped, ["geometry", "Name", "SARPID", "snapped", "snap_dist", "snap_log"]
    ].join(original_locations.geometry.rename("orig_pt"))
    tmp["new_pt"] = tmp.geometry.copy()
    tmp["geometry"] = connect_points(tmp.new_pt, tmp.orig_pt)

    to_gpkg(
        tmp.drop(columns=["new_pt", "orig_pt"]).reset_index(drop=True),
        out_dir / "{}pre_snap_to_post_snap".format(prefix),
        crs=CRS,
    )
    to_gpkg(
        tmp.drop(columns=["geometry", "new_pt"])
        .rename(columns={"orig_pt": "geometry"})
        .reset_index(drop=True),
        out_dir / "{}pre_snap".format(prefix),
        crs=CRS,
    )
    to_gpkg(
        tmp.drop(columns=["geometry", "orig_pt"])
        .rename(columns={"new_pt": "geometry"})
        .reset_index(drop=True),
        out_dir / "{}post_snap".format(prefix),
        crs=CRS,
    )
    ],
).set_index("lineID")

df = df.join(flowlines, on="lineID")
df["loop"] = df.loop.fillna(False)

print(df.groupby("loop").size())

### All done processing!
print("\n--------------\n")
df = df.reset_index(drop=True)

to_geofeather(df, master_dir / "waterfalls.feather")

print("writing GIS for QA/QC")
to_gpkg(df, qa_dir / "waterfalls")
# to_shp(df, qa_dir / "waterfalls.shp")

# Extract out only the snapped ones
df = df.loc[df.snapped
            & ~(df.duplicate | df.dropped | df.excluded)].reset_index(
                drop=True)
df.lineID = df.lineID.astype("uint32")
df.NHDPlusID = df.NHDPlusID.astype("uint64")

print("Serializing {0} snapped waterfalls".format(len(df)))
to_geofeather(
    df[["geometry", "id", "HUC2", "lineID", "NHDPlusID", "loop", "waterbody"]],
    snapped_dir / "waterfalls.feather",
)
示例#4
0
df = deserialize_dfs([
    networks_dir / region / "small_barriers/barriers_network.feather"
    for region in REGION_GROUPS
], )

networkIDs = df.loc[df.kind == "small_barrier"].upNetID.unique()

for region in list(REGION_GROUPS.keys()):
    print("\n----------------\n processing {}".format(region))

    networks = from_geofeather(networks_dir / region / "small_barriers" /
                               "network.feather")

    # Extract only the networks associated with small barriers, the rest are dams
    networks = networks.loc[networks.networkID.isin(networkIDs),
                            ["networkID", "geometry"]]

    if len(networks) == 0:
        print("No small barriers in this region, skipping")
        continue

    print("Writing to GPKG")
    to_gpkg(
        networks.reset_index(drop=True),
        data_dir / "tiles" / "small_barriers_network{}".format(region),
        index=False,
        name="networks",
        crs=CRS_WKT,
    )
    # fix index data type issues
    waterbodies.index = waterbodies.index.astype("uint32")

    print("------------------")

    print("Serializing {:,} flowlines".format(len(flowlines)))
    flowlines = flowlines.reset_index()
    to_geofeather(flowlines, out_dir / "flowlines.feather", crs=CRS)
    serialize_df(joins.reset_index(drop=True),
                 out_dir / "flowline_joins.feather")

    print("Serializing {:,} waterbodies".format(len(waterbodies)))
    to_geofeather(waterbodies.reset_index(),
                  out_dir / "waterbodies.feather",
                  crs=CRS)
    serialize_df(wb_joins.reset_index(drop=True),
                 out_dir / "waterbody_flowline_joins.feather")

    print("Serializing {:,} drain points".format(len(drains)))
    to_geofeather(drains, out_dir / "waterbody_drain_points.feather", crs=CRS)

    # Serialize to GIS files
    print("Serializing to GIS files")
    to_gpkg(flowlines.reset_index(), out_dir / "flowlines", crs=CRS)
    to_gpkg(waterbodies.reset_index(), out_dir / "waterbodies", crs=CRS)
    to_gpkg(drains, out_dir / "waterbody_drain_points", crs=CRS)

    print("Region done in {:.2f}s".format(time() - region_start))

print("==============\nAll done in {:.2f}s".format(time() - start))
df = df.join(flowlines, on="lineID")
df["loop"] = df.loop.fillna(False)

print(df.groupby("loop").size())

print("\n--------------\n")

df = df.reset_index(drop=True)

print("Serializing {:,} small barriers".format(len(df)))
to_geofeather(df, master_dir / "small_barriers.feather")


print("writing GIS for QA/QC")
to_gpkg(df, qa_dir / "small_barriers")


# Extract out only the snapped ones
df = df.loc[df.snapped & ~(df.duplicate | df.dropped | df.excluded)].reset_index(
    drop=True
)
df.lineID = df.lineID.astype("uint32")
df.NHDPlusID = df.NHDPlusID.astype("uint64")

print("Serializing {:,} snapped small barriers".format(len(df)))
to_geofeather(
    df[["geometry", "id", "HUC2", "lineID", "NHDPlusID", "loop", "waterbody"]],
    snapped_dir / "small_barriers.feather",
)
示例#7
0
df = df.join(flowlines, on="lineID")
df["loop"] = df.loop.fillna(False)

print(df.groupby("loop").size())

### All done processing!

print("\n--------------\n")
df = df.reset_index(drop=True)

print("Serializing {:,} dams to master file".format(len(df)))
to_geofeather(df, master_dir / "dams.feather", crs=CRS)

print("writing GIS for QA/QC")
to_gpkg(df, qa_dir / "dams", crs=CRS)


# Extract out only the snapped ones
df = df.loc[df.snapped & ~(df.duplicate | df.dropped | df.excluded)].reset_index(
    drop=True
)
df.lineID = df.lineID.astype("uint32")
df.NHDPlusID = df.NHDPlusID.astype("uint64")

print("Serializing {:,} snapped dams".format(len(df)))
to_geofeather(
    df[["geometry", "id", "HUC2", "lineID", "NHDPlusID", "loop", "waterbody"]],
    snapped_dir / "dams.feather",
    crs=CRS,
)
                         drains.set_index("wbID").geometry, 250)

dams.loc[nearest_drains.index, "wbID"] = nearest_drains
print("Found {:,} nearest neighbors in {:.2f}s".format(len(nearest_drains),
                                                       time() - nearest_start))

print("{:,} dams not associated with waterbodies".format(
    dams.wbID.isnull().sum()))

dams.wbID = dams.wbID.fillna(-1)

print("Serializing...")
# dams = to_gdf(dams, crs=CRS).reset_index(drop=True)
dams = dams.reset_index()
to_geofeather(dams, out_dir / "nhd_dams_pt.feather", crs=CRS)
to_gpkg(dams, out_dir / "nhd_dams_pt")

nhd_dams = nhd_dams.loc[nhd_dams.index.isin(dams.id.unique())].reset_index()
to_geofeather(nhd_dams, out_dir / "nhd_dams_poly.feather", crs=CRS)
to_gpkg(nhd_dams, out_dir / "nhd_dams_poly")

# TODO: exporting to shapefile segfaults, not sure why.  Issue likely with conversion to shapely geoms

print("==============\nAll done in {:.2f}s".format(time() - start))

#### Old code - tries to use network topology to reduce set of lines per dam:
# counts = downstreams.apply(len)
# ix = dams.loc[dams.id.isin(counts.loc[counts == 1].index)].index
# dams.loc[ix, "newLineID"] = dams.loc[ix].id.apply(lambda id: downstreams.loc[id][0])
# dams.loc[dams.id.isin(ids)].id.apply(lambda id: downstreams.loc[id][0])
# ix = lines_by_dam.loc[counts == 1]
    networks = from_geofeather(networks_dir / region / "dams" /
                               "network.feather")

    # Extract only the networks associated with small barriers, the rest are dams
    networks = networks.loc[networks.networkID.isin(networkIDs),
                            ["networkID", "geometry"]]

    if len(networks) == 0:
        print("No small barriers in this region, skipping")
        continue

    print("Writing to GPKG")
    to_gpkg(
        networks.reset_index(drop=True),
        data_dir / "tiles" / "dam_networks{}".format(region),
        index=False,
        name="networks",
        crs=CRS_WKT,
    )

### Region 21 is a special case
region = "21"
print("\n----------------\n processing {}".format(region))
df = from_geofeather(networks_dir / region / "dams" / "barriers.feather")
networkIDs = df.upNetID.unique()

networks = from_geofeather(networks_dir / region / "dams" / "network.feather")
networks = networks.loc[networks.networkID.isin(networkIDs)]

print("Writing to GPKG")
to_gpkg(