Exemplo n.º 1
0
def test_line_locate_point_empty(normalized):
    assert np.isnan(
        pygeos.line_locate_point(line_string, empty_point, normalized=normalized)
    )
    assert np.isnan(
        pygeos.line_locate_point(empty_line_string, point, normalized=normalized)
    )
Exemplo n.º 2
0
def cut_line_at_points(line, cut_points, tolerance=1e-6):
    """Cut a pygeos line geometry at points.
    If there are no interior points, the original line will be returned.

    Parameters
    ----------
    line : pygeos Linestring
    cut_points : list-like of pygeos Points
        will be projected onto the line; those interior to the line will be
        used to cut the line in to new segments.
    tolerance : float, optional (default: 1e-6)
        minimum distance from endpoints to consider the points interior
        to the line.

    Returns
    -------
    MultiLineStrings (or LineString, if unchanged)
    """
    if not pg.get_type_id(line) == 1:
        raise ValueError("line is not a single linestring")

    vertices = pg.get_point(line, range(pg.get_num_points(line)))
    offsets = pg.line_locate_point(line, vertices)
    cut_offsets = pg.line_locate_point(line, cut_points)
    # only keep those that are interior to the line and ignore those very close
    # to endpoints or beyond endpoints
    cut_offsets = cut_offsets[(cut_offsets > tolerance)
                              & (cut_offsets < offsets[-1] - tolerance)]

    if len(cut_offsets) == 0:
        # nothing to cut, return original
        return line

    # get coordinates of new vertices from the cut points (interpolated onto the line)
    cut_offsets.sort()

    # add in the last coordinate of the line
    cut_offsets = np.append(cut_offsets, offsets[-1])

    # TODO: convert this to a pygos ufunc
    coords = pg.get_coordinates(line)
    cut_coords = pg.get_coordinates(
        pg.line_interpolate_point(line, cut_offsets))
    lines = []
    orig_ix = 0
    for cut_ix in range(len(cut_offsets)):
        offset = cut_offsets[cut_ix]

        segment = []
        if cut_ix > 0:
            segment = [cut_coords[cut_ix - 1]]
        while offsets[orig_ix] < offset:
            segment.append(coords[orig_ix])
            orig_ix += 1

        segment.append(cut_coords[cut_ix])
        lines.append(pg.linestrings(segment))

    return pg.multilinestrings(lines)
Exemplo n.º 3
0
def project(data, other, normalized=False):
    if compat.USE_PYGEOS:
        try:
            return pygeos.line_locate_point(data, other, normalized=normalized)
        except TypeError:  # support for pygeos<0.9
            return pygeos.line_locate_point(data, other, normalize=normalized)
    else:
        return _binary_op("project", data, other, normalized=normalized)
Exemplo n.º 4
0
def test_line_locate_point_invalid_geometry(normalized):
    with pytest.raises(pygeos.GEOSException):
        pygeos.line_locate_point(line_string,
                                 line_string,
                                 normalized=normalized)

    with pytest.raises(pygeos.GEOSException):
        pygeos.line_locate_point(polygon, point, normalized=normalized)
Exemplo n.º 5
0
def test_line_locate_point_empty():
    assert np.isnan(pygeos.line_locate_point(line_string, empty_point))
    assert np.isnan(pygeos.line_locate_point(empty_line_string, point))
Exemplo n.º 6
0
def test_line_locate_point_none():
    assert np.isnan(pygeos.line_locate_point(line_string, None))
    assert np.isnan(pygeos.line_locate_point(None, point))
Exemplo n.º 7
0
def test_line_locate_point_geom_array2():
    points = pygeos.points([[0, 0], [1, 0]])
    actual = pygeos.line_locate_point(line_string, points)
    np.testing.assert_allclose(actual, [0.0, 1.0])
Exemplo n.º 8
0
def test_line_locate_point_geom_array():
    point = pygeos.points(0, 1)
    actual = pygeos.line_locate_point([line_string, linear_ring], point)
    np.testing.assert_allclose(actual, [0.0, 3.0])
Exemplo n.º 9
0
def project(data, other, normalized=False):
    if compat.USE_PYGEOS:
        return pygeos.line_locate_point(data, other, normalize=normalized)
    else:
        return _binary_op("project", data, other, normalized=normalized)
Exemplo n.º 10
0
def find_dam_face_from_waterbody(waterbody, drain_pt):
    total_area = pg.area(waterbody)
    ring = pg.get_exterior_ring(pg.normalize(waterbody))
    total_length = pg.length(ring)
    num_pts = pg.get_num_points(ring) - 1  # drop closing coordinate
    vertices = pg.get_point(ring, range(num_pts))

    ### Extract line segments that are no more than 1/3 coordinates of polygon
    # starting from the vertex nearest the drain
    # note: lower numbers are to the right
    tree = pg.STRtree(vertices)
    ix = tree.nearest(drain_pt)[1][0]
    side_width = min(num_pts // 3, MAX_SIDE_PTS)
    left_ix = ix + side_width
    right_ix = ix - side_width

    # extract these as a left-to-write line;
    pts = vertices[max(right_ix, 0):min(num_pts, left_ix)][::-1]
    if left_ix >= num_pts:
        pts = np.append(vertices[0:left_ix - num_pts][::-1], pts)

    if right_ix < 0:
        pts = np.append(pts, vertices[num_pts + right_ix:num_pts][::-1])

    coords = pg.get_coordinates(pts)

    if len(coords) > 2:
        # first run a simplification process to extract the major shape and bends
        # then run the straight line algorithm
        simp_coords, simp_ix = simplify_vw(
            coords, min(MAX_SIMPLIFY_AREA, total_area / 100))

        if len(simp_coords) > 2:
            keep_coords, ix = extract_straight_segments(
                simp_coords, max_angle=MAX_STRAIGHT_ANGLE, loops=5)
            keep_ix = simp_ix.take(ix)

        else:
            keep_coords = simp_coords
            keep_ix = simp_ix

    else:
        keep_coords = coords
        keep_ix = np.arange(len(coords))

    ### Calculate the length of each run and drop any that are not sufficiently long
    lengths = segment_length(keep_coords)
    ix = (lengths >= MIN_DAM_WIDTH) & (lengths / total_length <
                                       MAX_WIDTH_RATIO)

    pairs = np.dstack([keep_ix[:-1][ix], keep_ix[1:][ix]])[0]

    # since ranges are ragged, we have to do this in a loop instead of vectorized
    segments = []
    for start, end in pairs:
        segments.append(pg.linestrings(coords[start:end + 1]))

    segments = np.array(segments)

    # only keep the segments that are close to the drain
    segments = segments[
        pg.intersects(segments, pg.buffer(drain_pt, MAX_DRAIN_DIST)), ]

    if not len(segments):
        return segments

    # only keep those where the drain is interior to the line
    pos = pg.line_locate_point(segments, drain_pt)
    lengths = pg.length(segments)

    ix = (pos >= MIN_INTERIOR_DIST) & (pos <= (lengths - MIN_INTERIOR_DIST))

    return segments[ix]
Exemplo n.º 11
0
def snap_to_flowlines(df, to_snap):
    """Snap to nearest flowline, within tolerance

    Updates df with snapping results, and returns to_snap as set of dams still
    needing to be snapped after this operation.

    Parameters
    ----------
    df : GeoDataFrame
        master dataset, this is where all snapping gets recorded
    to_snap : DataFrame
        data frame containing pygeos geometries to snap ("geometry")
        and snapping tolerance ("snap_tolerance")

    Returns
    -------
    tuple of (GeoDataFrame, DataFrame)
        (df, to_snap)
    """

    for region, HUC2s in list(REGION_GROUPS.items()):
        region_start = time()

        print("\n----- {} ------\n".format(region))

        print("Reading flowlines...")
        flowlines = from_geofeather(
            nhd_dir / "clean" / region / "flowlines.feather"
        ).set_index("lineID")

        in_region = to_snap.loc[to_snap.HUC2.isin(HUC2s)]
        print(
            "Selected {:,} barriers in region to snap against {:,} flowlines".format(
                len(in_region), len(flowlines)
            )
        )

        if len(in_region) == 0:
            print("No barriers in region to snap")
            continue

        print("Finding nearest flowlines...")
        # TODO: can use near instead of nearest, and persist list of near lineIDs per barrier
        # so that we can construct subnetworks with just those
        lines = nearest(
            in_region.geometry, flowlines.geometry, in_region.snap_tolerance
        )
        lines = lines.join(in_region.geometry).join(
            flowlines.geometry.rename("line"), on="lineID",
        )

        # project the point to the line,
        # find out its distance on the line,
        # then interpolate its new coordinates
        lines["geometry"] = pg.line_interpolate_point(
            lines.line, pg.line_locate_point(lines.line, lines.geometry)
        )

        ix = lines.index
        df.loc[ix, "snapped"] = True
        df.loc[ix, "geometry"] = lines.geometry
        df.loc[ix, "snap_dist"] = lines.distance
        df.loc[ix, "snap_ref_id"] = lines.lineID
        df.loc[ix, "lineID"] = lines.lineID
        df.loc[ix, "snap_log"] = ndarray_append_strings(
            "snapped: within ",
            to_snap.loc[ix].snap_tolerance,
            "m tolerance of flowline",
        )

        to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy()

        print(
            "{:,} barriers snapped in region in {:.2f}s".format(
                len(ix), time() - region_start
            )
        )

    # TODO: flag those that joined to loops

    return df, to_snap
Exemplo n.º 12
0
def snap_to_flowlines(df, to_snap):
    """Snap to nearest flowline, within tolerance

    Updates df with snapping results, and returns to_snap as set of dams still
    needing to be snapped after this operation.

    If dams are within SNAP_ENDPOINT_TOLERANCE of the endpoints of the line, they
    will be snapped to the endpoint instead of closest point on line.

    Parameters
    ----------
    df : GeoDataFrame
        master dataset, this is where all snapping gets recorded
    to_snap : DataFrame
        data frame containing pygeos geometries to snap ("geometry")
        and snapping tolerance ("snap_tolerance")

    Returns
    -------
    tuple of (GeoDataFrame, DataFrame)
        (df, to_snap)
    """

    print("=================\nSnapping to flowlines...")

    for huc2 in sorted(to_snap.HUC2.unique()):
        region_start = time()

        print(f"\n----- {huc2} ------")
        in_huc2 = to_snap.loc[to_snap.HUC2 == huc2].copy()
        flowlines = gp.read_feather(
            nhd_dir / "clean" / huc2 / "flowlines.feather",
            columns=["geometry", "lineID"],
        ).set_index("lineID")

        print(
            f"HUC {huc2} selected {len(in_huc2):,} barriers in region to snap against {len(flowlines):,} flowlines"
        )

        lines = nearest(
            pd.Series(in_huc2.geometry.values.data, index=in_huc2.index),
            pd.Series(flowlines.geometry.values.data, index=flowlines.index),
            in_huc2.snap_tolerance.values,
        )
        lines = lines.join(in_huc2.geometry).join(
            flowlines.geometry.rename("line"),
            on="lineID",
        )

        # project the point to the line,
        # find out its distance on the line,
        lines["line_pos"] = pg.line_locate_point(lines.line.values.data,
                                                 lines.geometry.values.data)

        # if within tolerance of start point, snap to start
        ix = lines["line_pos"] <= SNAP_ENDPOINT_TOLERANCE
        lines.loc[ix, "line_pos"] = 0

        # if within tolerance of endpoint, snap to end
        end = pg.length(lines.line.values.data)
        ix = lines["line_pos"] >= end - SNAP_ENDPOINT_TOLERANCE
        lines.loc[ix, "line_pos"] = end[ix]

        # then interpolate its new coordinates
        lines["geometry"] = pg.line_interpolate_point(lines.line.values.data,
                                                      lines["line_pos"])

        ix = lines.index
        df.loc[ix, "snapped"] = True
        df.loc[ix, "geometry"] = lines.geometry
        df.loc[ix, "snap_dist"] = lines.distance
        df.loc[ix, "snap_ref_id"] = lines.lineID
        df.loc[ix, "lineID"] = lines.lineID
        df.loc[ix, "snap_log"] = ndarray_append_strings(
            "snapped: within ",
            to_snap.loc[ix].snap_tolerance,
            "m tolerance of flowline",
        )

        to_snap = to_snap.loc[~to_snap.index.isin(ix)].copy()

        print("{:,} barriers snapped in region in {:.2f}s".format(
            len(ix),
            time() - region_start))

    # TODO: flag those that joined to loops

    return df, to_snap
Exemplo n.º 13
0
def test_line_locate_point_none(normalized):
    assert np.isnan(
        pygeos.line_locate_point(line_string, None, normalized=normalized))
    assert np.isnan(
        pygeos.line_locate_point(None, point, normalized=normalized))
Exemplo n.º 14
0
def cut_flowlines_at_points(flowlines, joins, points, next_lineID):
    """General method for cutting flowlines at points and updating joins.

    Only points >= SNAP_ENDPOINT_TOLERANCE are used to cut lines.

    Lines are cut starting at the upstream end; the original ordering of points
    per line is not preserved.

    Parameters
    ----------
    flowlines : GeoDataFrame
    joins : DataFrame
        flowline joins
    points : GeoSeries
        points to cut flowlines, must be indexed to join against flowlines;
        one record per singular Point.
    next_lineID : int
        id of next flowline to be created

    Returns
    -------
    (GeoDataFrame, DataFrame)
        Updated flowlines and joins.
        Note: flowlines have a "new" column to identify new flowlines created here.
    """

    df = flowlines.join(points.rename("point"), how="inner")
    df["pos"] = pg.line_locate_point(df.geometry.values.data,
                                     df.point.values.data)

    # only keep cut points that are sufficiently interior to the line
    # (i.e., not too close to endpoints)
    ix = (df.pos >= SNAP_ENDPOINT_TOLERANCE) & (
        (df["length"] - df.pos).abs() >= SNAP_ENDPOINT_TOLERANCE)

    # sort remaining cut points in ascending order on their lines
    df = df.loc[ix].sort_values(by=["lineID", "pos"])
    # convert to plain DataFrame so that we can extract coords
    grouped = pd.DataFrame(
        df.groupby("lineID").agg({
            "geometry": "first",
            "pos": list
        }))
    grouped["geometry"] = grouped.geometry.values.data
    outer_ix, inner_ix, lines = cut_lines_at_points(
        grouped.geometry.apply(lambda x: pg.get_coordinates(x)).values,
        grouped.pos.apply(np.array).values,
    )
    lines = np.asarray(lines)
    new_flowlines = gp.GeoDataFrame({
        "lineID": (next_lineID + np.arange(len(outer_ix))).astype("uint32"),
        "origLineID":
        grouped.index.take(outer_ix),
        "geometry":
        lines,
        "length":
        pg.length(lines).astype("float32"),
        "sinuosity":
        calculate_sinuosity(lines).astype("float32"),
    }).join(
        flowlines.drop(
            columns=[
                "geometry",
                "lineID",
                "xmin",
                "ymin",
                "xmax",
                "ymax",
                "length",
                "sinuosity",
            ],
            errors="ignore",
        ),
        on="origLineID",
    )

    ### Update flowline joins
    # transform new lines to create new joins at the upstream / downstream most
    # points of the original line
    l = new_flowlines.groupby("origLineID").lineID
    # the first new line per original line is the furthest upstream, so use its
    # ID as the new downstream ID for anything that had this origLineID as its downstream
    first = l.first().rename("new_downstream_id")
    # the last new line per original line is the furthest downstream...
    last = l.last().rename("new_upstream_id")

    # Update existing joins with the new lineIDs we created at the upstream or downstream
    # ends of segments we just created
    joins = update_joins(
        joins,
        first,
        last,
        downstream_col="downstream_id",
        upstream_col="upstream_id",
    )

    ### Create new line joins for any that weren't inserted above
    # Transform all groups of new line IDs per original lineID
    # into joins structure
    atts = (new_flowlines.groupby("origLineID")[[
        "NHDPlusID", "loop", "HUC4"
    ]].first().rename(columns={"NHDPlusID": "upstream"}))

    # function to make upstream / downstream side of join
    pairs = lambda a: pd.Series(zip(a[:-1], a[1:]))
    new_joins = (l.apply(pairs).apply(
        pd.Series).reset_index().rename(columns={
            0: "upstream_id",
            1: "downstream_id"
        }).join(atts, on="origLineID"))

    # NHDPlusID is same for both sides
    new_joins["downstream"] = new_joins.upstream
    new_joins["type"] = "internal"
    # new joins do not terminate in marine, so marine should always be false
    new_joins["marine"] = False
    new_joins = new_joins[[
        "upstream",
        "downstream",
        "upstream_id",
        "downstream_id",
        "type",
        "loop",
        "marine",
        "HUC4",
    ]]

    joins = (joins.append(new_joins, ignore_index=True,
                          sort=False).sort_values([
                              "downstream", "upstream", "downstream_id",
                              "upstream_id"
                          ]).reset_index(drop=True))

    remove_ids = new_flowlines.origLineID.unique()
    flowlines["new"] = False
    new_flowlines["new"] = True
    flowlines = (
        flowlines.loc[~flowlines.index.isin(remove_ids)].reset_index().append(
            new_flowlines.drop(columns=["origLineID"]),
            ignore_index=True,
            sort=False).set_index("lineID"))

    return flowlines, joins
Exemplo n.º 15
0
def cut_flowlines_at_barriers(flowlines,
                              joins,
                              barriers,
                              next_segment_id=None):
    """Cut flowlines by barriers.

    Parameters
    ----------
    flowlines : GeoDataFrame
        ALL flowlines for region.
    barriers : GeoDataFrame
        Barriers that will be used to cut flowlines.
    joins : DataFrame
        Joins between flowlines (upstream, downstream pairs).
    next_segment_id : int, optional
        Used as starting point for IDs of new segments created by cutting flowlines.

    Returns
    -------
    GeoDataFrame, DataFrame, DataFrame
        updated flowlines, updated joins, barrier joins (upstream / downstream flowline ID per barrier)
    """

    start = time()
    print(f"Starting number of segments: {len(flowlines):,}")
    print(f"Cutting in {len(barriers):,} barriers")

    # Our segment ids are ints, so just increment from the last one we had from NHD
    if next_segment_id is None:
        next_segment_id = int(flowlines.index.max() + 1)

    # join barriers to lines and extract those that have segments (via inner join)
    segments = (flowlines[["lineID", "NHDPlusID",
                           "geometry"]].rename(columns={
                               "geometry": "flowline"
                           }).join(
                               barriers[["geometry", "barrierID",
                                         "lineID"]].set_index("lineID").rename(
                                             columns={"geometry": "barrier"}),
                               how="inner",
                           ))

    # Calculate the position of each barrier on each segment.
    # Barriers are on upstream or downstream end of segment if they are within
    # SNAP_ENDPOINT_TOLERANCE of the ends.  Otherwise, they are splits
    segments["linepos"] = pg.line_locate_point(segments.flowline.values.data,
                                               segments.barrier.values.data)

    ### Upstream and downstream endpoint barriers
    segments["on_upstream"] = segments.linepos <= SNAP_ENDPOINT_TOLERANCE
    segments["on_downstream"] = (
        segments.linepos >=
        pg.length(segments.flowline.values.data) - SNAP_ENDPOINT_TOLERANCE)

    # if line length is < SNAP_ENDPOINT_TOLERANCE, then barrier could be tagged
    # to both sides, which is incorrect.  Default to on_downstream.
    segments.loc[segments.on_upstream & segments.on_downstream,
                 "on_upstream"] = False

    print(
        f"{segments.on_upstream.sum():,} barriers on upstream point of their segments"
    )
    print(
        f"{segments.on_downstream.sum():,} barriers on downstream point of their segments"
    )

    # Barriers on upstream endpoint:
    # their upstream_id is the upstream_id(s) of their segment from joins,
    # and their downstream_is is the segment they are on.
    # NOTE: a barrier may have multiple upstreams if it occurs at a fork in the network.
    # All terminal upstreams should be already coded as 0 in joins, but just in case
    # we assign N/A to 0.

    upstream_barrier_joins = ((segments.loc[segments.on_upstream][[
        "barrierID", "lineID"
    ]].rename(columns={
        "lineID": "downstream_id"
    }).join(joins.set_index("downstream_id").upstream_id,
            on="downstream_id")).fillna(0).astype("uint64"))

    # Barriers on downstream endpoint:
    # their upstream_id is the segment they are on and their downstream_id is the
    # downstream_id of their segment from the joins.
    # Some downstream_ids may be missing if the barrier is on the downstream-most point of the
    # network (downstream terminal) and further downstream segments were removed due to removing
    # coastline segments.
    downstream_barrier_joins = ((segments.loc[segments.on_downstream][[
        "barrierID", "lineID"
    ]].rename(columns={
        "lineID": "upstream_id"
    }).join(joins.set_index("upstream_id").downstream_id,
            on="upstream_id")).fillna(0).astype("uint64"))

    barrier_joins = upstream_barrier_joins.append(downstream_barrier_joins,
                                                  ignore_index=True,
                                                  sort=False).set_index(
                                                      "barrierID", drop=False)

    ### Split segments have barriers that are not at endpoints

    split_segments = segments.loc[~(segments.on_upstream
                                    | segments.on_downstream)]
    # join in count of barriers that SPLIT this segment
    split_segments = split_segments.join(
        split_segments.groupby(level=0).size().rename("barriers"))

    print(
        f"{(split_segments.barriers == 1).sum():,} segments to cut have one barrier"
    )
    print(
        f"{(split_segments.barriers > 1).sum():,} segments to cut have more than one barrier"
    )

    # ordinate the barriers by their projected distance on the line
    # Order this so we are always moving from upstream end to downstream end
    split_segments = split_segments.rename_axis("idx").sort_values(
        by=["idx", "linepos"], ascending=True)

    # Convert to DataFrame so that geometry cols are arrays of pygeos geometries
    tmp = pd.DataFrame(split_segments.copy())
    tmp.flowline = tmp.flowline.values.data
    tmp.barrier = tmp.barrier.values.data
    tmp["pos"] = pg.line_locate_point(tmp.flowline.values, tmp.barrier.values)

    # Group barriers by line so that we can split geometries in one pass
    grouped = (
        tmp[[
            "lineID",
            "NHDPlusID",
            "barrierID",
            "barriers",
            "flowline",
            "barrier",
            "pos",
        ]].sort_values(by=["lineID", "pos"]).groupby("lineID").agg({
            "lineID":
            "first",
            "NHDPlusID":
            "first",
            "flowline":
            "first",
            "barrierID":
            list,
            "barriers":
            "first",
            # "barrier": list,  # TODO: remove
            "pos":
            list,
        }))

    # cut line for all barriers
    outer_ix, inner_ix, lines = cut_lines_at_points(
        grouped.flowline.apply(lambda x: pg.get_coordinates(x)).values,
        grouped.pos.apply(np.array).values,
    )

    lines = np.asarray(lines)
    new_flowlines = gp.GeoDataFrame({
        "lineID":
        (next_segment_id + np.arange(len(outer_ix))).astype("uint32"),
        "origLineID":
        grouped.index.take(outer_ix),
        "position":
        inner_ix,
        "geometry":
        lines,
        "length":
        pg.length(lines).astype("float32"),
        "sinuosity":
        calculate_sinuosity(lines).astype("float32"),
    }).join(
        flowlines.drop(
            columns=[
                "geometry",
                "lineID",
                "xmin",
                "ymin",
                "xmax",
                "ymax",
                "length",
                "sinuosity",
            ],
            errors="ignore",
        ),
        on="origLineID",
    )

    # transform new segments to create new joins
    l = new_flowlines.groupby("origLineID").lineID
    # the first new line per original line is the furthest upstream, so use its
    # ID as the new downstream ID for anything that had this origLineID as its downstream
    first = l.first().rename("new_downstream_id")
    # the last new line per original line is the furthest downstream...
    last = l.last().rename("new_upstream_id")

    # Update existing joins with the new lineIDs we created at the upstream or downstream
    # ends of segments we just created

    updated_joins = update_joins(joins,
                                 first,
                                 last,
                                 downstream_col="downstream_id",
                                 upstream_col="upstream_id")

    # also need to update any barrier joins already created for those on endpoints
    barrier_joins = update_joins(
        barrier_joins,
        first,
        last,
        downstream_col="downstream_id",
        upstream_col="upstream_id",
    )

    # For all new interior joins, create upstream & downstream ids per original line
    upstream_side = (new_flowlines.loc[~new_flowlines.lineID.isin(last)][[
        "origLineID", "position", "lineID"
    ]].set_index(["origLineID",
                  "position"]).rename(columns={"lineID": "upstream_id"}))

    downstream_side = new_flowlines.loc[~new_flowlines.lineID.isin(first)][[
        "origLineID", "position", "lineID"
    ]].rename(columns={"lineID": "downstream_id"})
    downstream_side.position = downstream_side.position - 1
    downstream_side = downstream_side.set_index(["origLineID", "position"])

    new_joins = (grouped.barrierID.apply(
        pd.Series).stack().astype("uint32").reset_index().rename(columns={
            "lineID": "origLineID",
            "level_1": "position",
            0: "barrierID"
        }).set_index([
            "origLineID", "position"
        ]).join(upstream_side).join(downstream_side).reset_index().join(
            grouped.NHDPlusID.rename("upstream"), on="origLineID"))
    new_joins["downstream"] = new_joins.upstream
    new_joins["type"] = "internal"
    new_joins["marine"] = False

    updated_joins = updated_joins.append(
        new_joins[[
            "upstream", "downstream", "upstream_id", "downstream_id", "type",
            "marine"
        ]],
        ignore_index=True,
        sort=False,
    ).sort_values(["downstream_id", "upstream_id"])

    barrier_joins = (barrier_joins.append(
        new_joins[["barrierID", "upstream_id", "downstream_id"]],
        ignore_index=True,
        sort=False,
    ).set_index("barrierID", drop=False).astype("uint32"))

    # any join that is upstream of a barrier cannot be marine
    updated_joins.loc[
        updated_joins.marine
        & updated_joins.upstream_id.isin(barrier_joins.upstream_id.unique()),
        "marine", ] = False

    # extract flowlines that are not split by barriers and merge in new flowlines
    unsplit_segments = flowlines.loc[~flowlines.index.isin(split_segments.index
                                                           )]
    updated_flowlines = unsplit_segments.append(
        new_flowlines.drop(columns=["origLineID", "position"]),
        ignore_index=True,
        sort=False,
    ).set_index("lineID", drop=False)

    print(f"Done cutting flowlines in {time() - start:.2f}s")

    return updated_flowlines, updated_joins, barrier_joins