def build_connections(supernetwork_parameters, dt):
    # TODO: Remove the dependence on dt in this function

    cols = supernetwork_parameters["columns"]
    param_df = nhd_io.read(supernetwork_parameters["geo_file_path"])

    param_df = param_df[list(cols.values())]
    param_df = param_df.set_index(cols["key"])

    if "mask_file_path" in supernetwork_parameters:
        data_mask = nhd_io.read_mask(
            supernetwork_parameters["mask_file_path"],
            layer_string=supernetwork_parameters["mask_layer_string"],
        )
        param_df = param_df.filter(
            data_mask.iloc[:, supernetwork_parameters["mask_key"]], axis=0)

    param_df = param_df.sort_index()
    param_df = nhd_io.replace_downstreams(param_df, cols["downstream"], 0)

    connections = nhd_network.extract_connections(param_df, cols["downstream"])
    # TODO: reorganize this so the wbodies object doesn't use the par-final param_df
    # This could mean doing something different to get the final param_df,
    # or changing the wbodies call to use the final param_df as it stands.
    wbodies = nhd_network.extract_waterbodies(
        param_df, cols["waterbody"],
        supernetwork_parameters["waterbody_null_code"])

    param_df["dt"] = dt
    param_df = param_df.rename(columns=reverse_dict(cols))
    param_df = param_df.astype("float32")

    # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']]
    return connections, wbodies, param_df
def build_connections(supernetwork_parameters, dt):
    # TODO: Remove the dependence on dt in this function

    cols = supernetwork_parameters["columns"]
    param_df = nhd_io.read(pathlib.Path(supernetwork_parameters["geo_file_path"]))

    param_df = param_df[list(cols.values())]
    param_df = param_df.set_index(cols["key"])

    if "mask_file_path" in supernetwork_parameters:
        data_mask = nhd_io.read_mask(
            pathlib.Path(supernetwork_parameters["mask_file_path"]),
            layer_string=supernetwork_parameters["mask_layer_string"],
        )
        param_df = param_df.filter(
            data_mask.iloc[:, supernetwork_parameters["mask_key"]], axis=0
        )

    param_df = param_df.sort_index()
    param_df = nhd_io.replace_downstreams(param_df, cols["downstream"], 0)

    connections = nhd_network.extract_connections(param_df, cols["downstream"])

    param_df["dt"] = dt
    param_df = param_df.rename(columns=reverse_dict(cols))
    param_df = param_df.astype("float32")

    # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']]
    return connections, param_df
示例#3
0
def build_connections(supernetwork_parameters):
    cols = supernetwork_parameters["columns"]
    terminal_code = supernetwork_parameters.get("terminal_code", 0)

    param_df = nhd_io.read(
        pathlib.Path(supernetwork_parameters["geo_file_path"]))

    param_df = param_df[list(cols.values())]
    param_df = param_df.set_index(cols["key"])

    if "mask_file_path" in supernetwork_parameters:
        data_mask = nhd_io.read_mask(
            pathlib.Path(supernetwork_parameters["mask_file_path"]),
            layer_string=supernetwork_parameters["mask_layer_string"],
        )
        param_df = param_df.filter(
            data_mask.iloc[:, supernetwork_parameters["mask_key"]], axis=0)

    param_df = param_df.rename(columns=reverse_dict(cols))
    # Rename parameter columns to standard names: from route-link names
    #        key: "link"
    #        downstream: "to"
    #        dx: "Length"
    #        n: "n"  # TODO: rename to `manningn`
    #        ncc: "nCC"  # TODO: rename to `mannningncc`
    #        s0: "So"  # TODO: rename to `bedslope`
    #        bw: "BtmWdth"  # TODO: rename to `bottomwidth`
    #        waterbody: "NHDWaterbodyComID"
    #        gages: "gages"
    #        tw: "TopWdth"  # TODO: rename to `topwidth`
    #        twcc: "TopWdthCC"  # TODO: rename to `topwidthcc`
    #        alt: "alt"
    #        musk: "MusK"
    #        musx: "MusX"
    #        cs: "ChSlp"  # TODO: rename to `sideslope`
    param_df = param_df.sort_index()

    param_df = param_df.rename(columns=reverse_dict(cols))

    wbodies = {}
    if "waterbody" in cols:
        wbodies = build_waterbodies(param_df[["waterbody"]],
                                    supernetwork_parameters, "waterbody")
        param_df = param_df.drop("waterbody", axis=1)

    gages = {}
    if "gages" in cols:
        gages = build_gages(param_df[["gages"]])
        param_df = param_df.drop("gages", axis=1)

    connections = nhd_network.extract_connections(param_df, "downstream")
    param_df = param_df.drop("downstream", axis=1)

    param_df = param_df.astype("float32")

    # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']]
    return connections, param_df, wbodies, gages
示例#4
0
def test_build_connections():

    # There can be an externally determined terminal code -- that's this first value
    terminal_codes = set()
    terminal_codes.add(test_terminal_code)
    # ... but there may also be off-domain nodes that are not explicitly identified
    # but which are terminal (i.e., off-domain) as a result of a mask or some other
    # an interior domain truncation that results in a
    # otherwise valid node value being pointed to, but which is masked out or
    # being intentionally separated into another domain.
    terminal_codes = terminal_codes | set(
        test_param_df[~test_param_df["downstream"].isin(test_param_df.index)][
            "downstream"
        ].values
    )

    connections = nhd_network.extract_connections(
        test_param_df, "downstream", terminal_codes
    )
    assert connections == expected_connections
def build_connections(supernetwork_parameters):
    cols = supernetwork_parameters["columns"]
    terminal_code = supernetwork_parameters.get("terminal_code", 0)

    param_df = nhd_io.read(pathlib.Path(supernetwork_parameters["geo_file_path"]))

    param_df = param_df[list(cols.values())]
    param_df = param_df.set_index(cols["key"])

    if "mask_file_path" in supernetwork_parameters:
        data_mask = nhd_io.read_mask(
            pathlib.Path(supernetwork_parameters["mask_file_path"]),
            layer_string=supernetwork_parameters["mask_layer_string"],
        )
        param_df = param_df.filter(
            data_mask.iloc[:, supernetwork_parameters["mask_key"]], axis=0
        )

    param_df = param_df.rename(columns=nhd_network.reverse_dict(cols))
    # Rename parameter columns to standard names: from route-link names
    #        key: "link"
    #        downstream: "to"
    #        dx: "Length"
    #        n: "n"  # TODO: rename to `manningn`
    #        ncc: "nCC"  # TODO: rename to `mannningncc`
    #        s0: "So"  # TODO: rename to `bedslope`
    #        bw: "BtmWdth"  # TODO: rename to `bottomwidth`
    #        waterbody: "NHDWaterbodyComID"
    #        gages: "gages"
    #        tw: "TopWdth"  # TODO: rename to `topwidth`
    #        twcc: "TopWdthCC"  # TODO: rename to `topwidthcc`
    #        alt: "alt"
    #        musk: "MusK"
    #        musx: "MusX"
    #        cs: "ChSlp"  # TODO: rename to `sideslope`
    param_df = param_df.sort_index()

    # TODO: Do we need this second, identical call to the one above?
    param_df = param_df.rename(columns=nhd_network.reverse_dict(cols)) 

    wbodies = {}
    if "waterbody" in cols:
        wbodies = build_waterbodies(
            param_df[["waterbody"]], supernetwork_parameters, "waterbody"
        )
        param_df = param_df.drop("waterbody", axis=1)

    gages = {}
    if "gages" in cols:
        gages = build_gages(param_df[["gages"]])
        param_df = param_df.drop("gages", axis=1)

    # There can be an externally determined terminal code -- that's this first value
    terminal_codes = set()
    terminal_codes.add(terminal_code)
    # ... but there may also be off-domain nodes that are not explicitly identified
    # but which are terminal (i.e., off-domain) as a result of a mask or some other
    # an interior domain truncation that results in a
    # otherwise valid node value being pointed to, but which is masked out or
    # being intentionally separated into another domain.
    terminal_codes = terminal_codes | set(
        param_df[~param_df["downstream"].isin(param_df.index)]["downstream"].values
    )
    connections = nhd_network.extract_connections(
        param_df, "downstream", terminal_codes=terminal_codes
    )
    param_df = param_df.drop("downstream", axis=1)

    param_df = param_df.astype("float32")

    # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']]
    return connections, param_df, wbodies, gages
def main():

    args = _handle_args()

    next_gen_input_folder = test_folder.joinpath("input", "next_gen")
    if args.input:
        next_gen_input_folder = pathlib.Path(args.input)

    # The following 2 values are currently hard coded for this test domain
    nts = 720  # number of timestep = 1140 * 60(model timestep) = 86400 = day
    dt_mc = 300.0  # time interval for MC

    # Currently tested on the Sugar Creek domain
    ngen_network_df = nhd_io.read_geopandas(args.supernetwork)
    if args.subset:
        ngen_network_df = ngen_network_df[
            ngen_network_df['realized_catchment'].isin(args.subset)]

    # Create dictionary mapping each connection ID
    ngen_network_dict = dict(zip(ngen_network_df.id, ngen_network_df.toid))

    #ngen_network_dict = dict(zip(ngen_network_df.ID, ngen_network_df.toID))

    def node_key_func(x):
        return int(x[3:])

    # Extract the ID integer values
    waterbody_connections = {
        node_key_func(k): node_key_func(v)
        for k, v in ngen_network_dict.items()
    }

    # Convert dictionary connections to data frame and make ID column the index
    waterbody_df = pd.DataFrame.from_dict(waterbody_connections,
                                          orient='index',
                                          columns=['to'])
    # Sort ID index column
    waterbody_df = waterbody_df.sort_index()

    waterbody_df = nhd_io.replace_downstreams(waterbody_df, "to", 0)

    connections = nhd_network.extract_connections(waterbody_df, "to")

    # Read and convert catchment lateral flows to format that can be processed by compute_network
    qlats = next_gen_io.read_catchment_lateral_flows(next_gen_input_folder)
    print(qlats)
    rconn = nhd_network.reverse_network(connections)

    subnets = nhd_network.reachable_network(rconn, check_disjoint=False)

    # read the routelink file
    nhd_routelink = nhd_io.read_netcdf("data/RouteLink_NHDPLUS.nc")
    nhd_routelink['dt'] = 300.0

    nhd_routelink.set_index("link", inplace=True)

    routelink_cols = {
        "downstream": "to",
        "dx": "Length",
        "n": "n",
        "ncc": "nCC",
        "s0": "So",
        "bw": "BtmWdth",
        "tw": "TopWdth",
        "twcc": "TopWdthCC",
        "waterbody": "NHDWaterbodyComID",
        "musk": "MusK",
        "musx": "MusX",
        "cs": "ChSlp",
    }

    routelink_cols = dict([(value, key)
                           for key, value in routelink_cols.items()])

    nhd_routelink.rename(columns=routelink_cols, inplace=True)

    with open(next_gen_input_folder / 'coarse/crosswalk.json') as f:
        crosswalk_data = json.load(f)
    waterbody_df['comid'] = waterbody_df.apply(
        lambda x: crosswalk_data['cat-' + str(x.name)]['outlet_COMID'], axis=1)

    waterbody_df = waterbody_df.join(nhd_routelink, on='comid', how='left')

    del nhd_routelink

    # initial conditions, assume to be zero
    # TO DO: Allow optional reading of initial conditions from WRF
    q0 = pd.DataFrame(0,
                      index=waterbody_df.index,
                      columns=["qu0", "qd0", "h0"],
                      dtype="float32")

    #Set types as float32
    waterbody_df = waterbody_df.astype({
        "dt": "float32",
        "bw": "float32",
        "tw": "float32",
        "twcc": "float32",
        "dx": "float32",
        "n": "float32",
        "ncc": "float32",
        "cs": "float32",
        "s0": "float32"
    })

    subreaches = {}

    for tw, net in subnets.items():
        path_func = partial(nhd_network.split_at_junction, net)
        subreaches[tw] = nhd_network.dfs_decomposition(net, path_func)

    results = []
    for twi, (tw, reach) in enumerate(subreaches.items(), 1):
        r = list(chain.from_iterable(reach))
        data_sub = waterbody_df.loc[
            r, ['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0'
                ]].sort_index()
        #data_sub = waterbody_df.loc[r, ['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']]
        qlat_sub = qlats.loc[r].sort_index()
        q0_sub = q0.loc[r].sort_index()

        results.append(
            mc_reach.compute_network(nts, reach, subnets[tw],
                                     data_sub.index.values,
                                     data_sub.columns.values, data_sub.values,
                                     qlat_sub.values, q0_sub.values))

    fdv_columns = pd.MultiIndex.from_product([range(nts),
                                              ['q', 'v',
                                               'd']]).to_flat_index()
    flowveldepth = pd.concat(
        [pd.DataFrame(d, index=i, columns=fdv_columns) for i, d in results],
        copy=False)
    flowveldepth = flowveldepth.sort_index()
    outfile_base_name = (args.supernetwork).split(".")[0]
    flowveldepth.to_csv(f"{outfile_base_name}_mc_results.csv")
    print(flowveldepth)
示例#7
0
def main():

    args = _handle_args()

    nts = args.nts
    debuglevel = -1 * args.debuglevel
    verbose = args.verbose
    showtiming = args.showtiming
    supernetwork = args.supernetwork
    break_network_at_waterbodies = args.break_network_at_waterbodies
    csv_output_folder = args.csv_output_folder
    assume_short_ts = args.assume_short_ts

    test_folder = pathlib.Path(root, "test")
    geo_input_folder = test_folder.joinpath("input", "geo")

    # TODO: Make these commandline args
    """##NHD Subset (Brazos/Lower Colorado)"""
    # supernetwork = 'Brazos_LowerColorado_Named_Streams'
    # supernetwork = 'Brazos_LowerColorado_ge5'
    # supernetwork = 'Pocono_TEST1'
    """##NHD CONUS order 5 and greater"""
    # supernetwork = 'CONUS_ge5'
    """These are large -- be careful"""
    # supernetwork = 'Mainstems_CONUS'
    # supernetwork = 'CONUS_FULL_RES_v20'
    # supernetwork = 'CONUS_Named_Streams' #create a subset of the full resolution by reading the GNIS field
    # supernetwork = 'CONUS_Named_combined' #process the Named streams through the Full-Res paths to join the many hanging reaches

    if verbose:
        print("creating supernetwork connections set")
    if showtiming:
        start_time = time.time()

    # STEP 1
    network_data = nnu.set_supernetwork_data(
        supernetwork=args.supernetwork,
        geo_input_folder=geo_input_folder,
        verbose=False,
        debuglevel=debuglevel,
    )

    cols = network_data["columns"]
    param_df = nhd_io.read(network_data["geo_file_path"])
    param_df = param_df[list(cols.values())]
    param_df = param_df.set_index(cols["key"])

    if "mask_file_path" in network_data:
        data_mask = nhd_io.read_mask(
            network_data["mask_file_path"],
            layer_string=network_data["mask_layer_string"],
        )
        param_df = param_df.filter(data_mask.iloc[:, network_data["mask_key"]], axis=0)

    param_df = param_df.sort_index()
    param_df = nhd_io.replace_downstreams(param_df, cols["downstream"], 0)

    if args.ql:
        qlats = nhd_io.read_qlat(args.ql)
    else:
        qlats = constant_qlats(param_df, nts, 10.0)

    # initial conditions, assume to be zero
    # TO DO: Allow optional reading of initial conditions from WRF
    q0 = pd.DataFrame(
        0, index=param_df.index, columns=["qu0", "qd0", "h0"], dtype="float32"
    )

    connections = nhd_network.extract_connections(param_df, cols["downstream"])
    wbodies = nhd_network.extract_waterbodies(
        param_df, cols["waterbody"], network_data["waterbody_null_code"]
    )

    if verbose:
        print("supernetwork connections set complete")
    if showtiming:
        print("... in %s seconds." % (time.time() - start_time))

    # STEP 2
    if showtiming:
        start_time = time.time()
    if verbose:
        print("organizing connections into reaches ...")

    rconn = nhd_network.reverse_network(connections)
    independent_networks = nhd_network.reachable_network(rconn)
    reaches_bytw = {}
    for tw, net in independent_networks.items():
        path_func = partial(nhd_network.split_at_junction, net)
        reaches_bytw[tw] = nhd_network.dfs_decomposition(net, path_func)

    if verbose:
        print("reach organization complete")
    if showtiming:
        print("... in %s seconds." % (time.time() - start_time))

    if showtiming:
        start_time = time.time()

    param_df["dt"] = 300.0
    param_df = param_df.rename(columns=nnu.reverse_dict(cols))
    param_df = param_df.astype("float32")

    # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']]

    parallel_compute_method = args.parallel_compute_method
    cpu_pool = args.cpu_pool
    compute_method = args.compute_method

    if compute_method == "standard cython compute network":
        compute_func = mc_reach.compute_network
    else:
        compute_func = mc_reach.compute_network

    if parallel_compute_method == "by-network":
        with Parallel(n_jobs=cpu_pool, backend="threading") as parallel:
            jobs = []
            for twi, (tw, reach_list) in enumerate(reaches_bytw.items(), 1):
                r = list(chain.from_iterable(reach_list))
                param_df_sub = param_df.loc[
                    r, ["dt", "bw", "tw", "twcc", "dx", "n", "ncc", "cs", "s0"]
                ].sort_index()
                qlat_sub = qlats.loc[r].sort_index()
                q0_sub = q0.loc[r].sort_index()
                jobs.append(
                    delayed(compute_func)(
                        nts,
                        reach_list,
                        independent_networks[tw],
                        param_df_sub.index.values,
                        param_df_sub.columns.values,
                        param_df_sub.values,
                        qlat_sub.values,
                        q0_sub.values,
                    )
                )
            results = parallel(jobs)

    else:  # Execute in serial
        results = []
        for twi, (tw, reach_list) in enumerate(reaches_bytw.items(), 1):
            r = list(chain.from_iterable(reach_list))
            param_df_sub = param_df.loc[
                r, ["dt", "bw", "tw", "twcc", "dx", "n", "ncc", "cs", "s0"]
            ].sort_index()
            qlat_sub = qlats.loc[r].sort_index()
            q0_sub = q0.loc[r].sort_index()
            results.append(
                compute_func(
                    nts,
                    reach_list,
                    independent_networks[tw],
                    param_df_sub.index.values,
                    param_df_sub.columns.values,
                    param_df_sub.values,
                    qlat_sub.values,
                    q0_sub.values,
                )
            )

    if (debuglevel <= -1) or csv_output_folder:
        qvd_columns = pd.MultiIndex.from_product(
            [range(nts), ["q", "v", "d"]]
        ).to_flat_index()
        flowveldepth = pd.concat(
            [pd.DataFrame(d, index=i, columns=qvd_columns) for i, d in results],
            copy=False,
        )

        if csv_output_folder:
            flowveldepth = flowveldepth.sort_index()
            output_path = pathlib.Path(csv_output_folder).resolve()
            flowveldepth.to_csv(output_path.joinpath(f"{args.supernetwork}.csv"))

        if debuglevel <= -1:
            print(flowveldepth)

    if verbose:
        print("ordered reach computation complete")
    if showtiming:
        print("... in %s seconds." % (time.time() - start_time))