Пример #1
0
def get_network_data(network_name):

    # Create directory path variable for test/input/geo, where NHD data and masks are stored
    test_folder = pathlib.Path(root, r"test").resolve()
    geo_input_folder = pathlib.Path(test_folder, r"input", r"geo").resolve()

    # Load network meta data for the Cape Fear Basin
    supernetwork = network_name
    network_data = nnu.set_supernetwork_data(supernetwork=supernetwork,
                                             geo_input_folder=geo_input_folder)

    # if the NHDPlus RouteLink file does not exist, download it.
    if not network_data["geo_file_path"].is_file:
        filename = network_data["geo_file_path"].name
        network_dl.download(network_data["geo_file_path"],
                            network_data["data_link"])

    # read-in NHD data, retain copies for viz- and full network analysis purposes
    RouteLink = nhd_io.read(network_data["geo_file_path"])

    # select only the necessary columns of geospatial data, set the DataFrame index
    cols = [v for c, v in network_data["columns"].items()]
    # GET THE STRAHLER ORDER DATA TOO!
    cols.append("order")

    data = nhd_io.read(network_data["geo_file_path"])
    data = data[cols]
    data = data.set_index(network_data["columns"]["key"])

    # mask NHDNetwork to isolate test network - full resolution Cape Fear basin, NC
    if "mask_file_path" in network_data:
        data_mask = nhd_io.read_mask(
            network_data["mask_file_path"],
            layer_string=network_data["mask_layer_string"],
        )
        data = data.filter(data_mask.iloc[:, network_data["mask_key"]], axis=0)

    # sort index
    data = data.sort_index()

    # replace downstreams
    data = nhd_io.replace_downstreams(data,
                                      network_data["columns"]["downstream"], 0)

    return data, RouteLink, network_data
def main():

    args = _handle_args()

    nts = 144
    debuglevel = -1 * args.debuglevel
    verbose = args.verbose
    showtiming = args.showtiming
    supernetwork = args.supernetwork
    break_network_at_waterbodies = args.break_network_at_waterbodies
    write_output = args.write_output
    assume_short_ts = args.assume_short_ts

    test_folder = pathlib.Path(root, "test")
    geo_input_folder = test_folder.joinpath("input", "geo")

    # TODO: Make these commandline args
    """##NHD Subset (Brazos/Lower Colorado)"""
    # supernetwork = 'Brazos_LowerColorado_Named_Streams'
    # supernetwork = 'Brazos_LowerColorado_ge5'
    # supernetwork = 'Pocono_TEST1'
    """##NHD CONUS order 5 and greater"""
    # supernetwork = 'CONUS_ge5'
    """These are large -- be careful"""
    # supernetwork = 'Mainstems_CONUS'
    # supernetwork = 'CONUS_FULL_RES_v20'
    # supernetwork = 'CONUS_Named_Streams' #create a subset of the full resolution by reading the GNIS field
    # supernetwork = 'CONUS_Named_combined' #process the Named streams through the Full-Res paths to join the many hanging reaches

    if verbose:
        print("creating supernetwork connections set")
    if showtiming:
        start_time = time.time()

    # STEP 1
    network_data = nnu.set_supernetwork_data(
        supernetwork=args.supernetwork,
        geo_input_folder=geo_input_folder,
        verbose=False,
        debuglevel=debuglevel,
    )

    cols = network_data["columns"]
    data = nhd_io.read(network_data["geo_file_path"])
    data = data[list(cols.values())]
    data = data.set_index(cols["key"])

    if "mask_file_path" in network_data:
        data_mask = nhd_io.read_mask(
            network_data["mask_file_path"],
            layer_string=network_data["mask_layer_string"],
        )
        data = data.filter(data_mask.iloc[:, network_data["mask_key"]], axis=0)

    data = data.sort_index()
    data = nhd_io.replace_downstreams(data, cols["downstream"], 0)

    if args.ql:
        qlats = nhd_io.read_qlat(args.ql)
    else:
        qlats = constant_qlats(data, nts, 10.0)

    connections = nhd_network.extract_connections(data, cols["downstream"])
    wbodies = nhd_network.extract_waterbodies(
        data, cols["waterbody"], network_data["waterbody_null_code"]
    )

    if verbose:
        print("supernetwork connections set complete")
    if showtiming:
        print("... in %s seconds." % (time.time() - start_time))

    # STEP 2
    if showtiming:
        start_time = time.time()
    if verbose:
        print("organizing connections into reaches ...")

    rconn = nhd_network.reverse_network(connections)
    subnets = nhd_network.reachable_network(rconn)
    subreaches = {}
    for tw, net in subnets.items():
        path_func = partial(nhd_network.split_at_junction, net)
        subreaches[tw] = nhd_network.dfs_decomposition(net, path_func)

    if verbose:
        print("reach organization complete")
    if showtiming:
        print("... in %s seconds." % (time.time() - start_time))

    if showtiming:
        start_time = time.time()

    data["dt"] = 300.0
    data = data.rename(columns=nnu.reverse_dict(cols))
    data = data.astype("float32")

    # datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']]

    parallelcompute = False
    if parallelcompute:
        with Parallel(n_jobs=-1, backend="threading") as parallel:
            jobs = []
            for twi, (tw, reach) in enumerate(subreaches.items(), 1):
                r = list(chain.from_iterable(reach))
                data_sub = data.loc[
                    r, ["dt", "bw", "tw", "twcc", "dx", "n", "ncc", "cs", "s0"]
                ].sort_index()
                qlat_sub = qlats.loc[r].sort_index()
                jobs.append(
                    delayed(mc_reach.compute_network)(
                        nts,
                        reach,
                        subnets[tw],
                        data_sub.index.values,
                        data_sub.columns.values,
                        data_sub.values,
                        qlat_sub.values,
                    )
                )
            results = parallel(jobs)
    else:
        results = []
        for twi, (tw, reach) in enumerate(subreaches.items(), 1):
            r = list(chain.from_iterable(reach))
            data_sub = data.loc[
                r, ["dt", "bw", "tw", "twcc", "dx", "n", "ncc", "cs", "s0"]
            ].sort_index()
            qlat_sub = qlats.loc[r].sort_index()
            results.append(
                mc_reach.compute_network(
                    nts,
                    reach,
                    subnets[tw],
                    data_sub.index.values,
                    data_sub.columns.values,
                    data_sub.values,
                    qlat_sub.values,
                )
            )

    fdv_columns = pd.MultiIndex.from_product(
        [range(nts), ["q", "v", "d"]]
    ).to_flat_index()
    flowveldepth = pd.concat(
        [pd.DataFrame(d, index=i, columns=fdv_columns) for i, d in results], copy=False
    )
    flowveldepth = flowveldepth.sort_index()
    flowveldepth.to_csv(f"{args.supernetwork}.csv")
    print(flowveldepth)

    if verbose:
        print("ordered reach computation complete")
    if showtiming:
        print("... in %s seconds." % (time.time() - start_time))
Пример #3
0
def main():

    global data_values

    verbose = True
    debuglevel = 0
    showtiming = True
    nts = 1440

    test_folder = os.path.join(root, r"test")
    geo_input_folder = os.path.join(test_folder, r"input", r"geo")

    # TODO: Make these commandline args
    """##NHD Subset (Brazos/Lower Colorado)"""
    # supernetwork = 'Brazos_LowerColorado_ge5'
    #supernetwork = "Pocono_TEST2"
    """##NHD CONUS order 5 and greater"""
    # supernetwork = 'CONUS_ge5'
    """These are large -- be careful"""
    supernetwork = 'Mainstems_CONUS'
    # supernetwork = 'CONUS_FULL_RES_v20'
    # supernetwork = 'CONUS_Named_Streams' #create a subset of the full resolution by reading the GNIS field
    # supernetwork = 'CONUS_Named_combined' #process the Named streams through the Full-Res paths to join the many hanging reaches

    if verbose:
        print("creating supernetwork connections set")
    if showtiming:
        start_time = time.time()
    # STEP 1
    network_data = nnu.set_supernetwork_data(
        supernetwork=supernetwork, geo_input_folder=geo_input_folder
    )

    cols = [v for c, v in network_data.items() if c.endswith("_col")]
    data = nhd_io.read(network_data["geo_file_path"])
    data = data[cols]
    data = data.set_index(network_data["key_col"])

    if "mask_file_path" in network_data:
        data_mask = nhd_io.read_mask(
            network_data["mask_file_path"],
            layer_string=network_data["mask_layer_string"],
        )
        data = data.filter(data_mask.iloc[:, network_data["mask_key"]], axis=0)

    data = data.sort_index()
    data = replace_downstreams(data, network_data['downstream_col'], 0)

    if supernetwork == "Pocono_TEST2":
        qlats = pd.read_csv('../../test/input/geo/PoconoSampleData2/Pocono_ql_testsamp1_nwm_mc.txt', index_col='ntt')
        qlats = qlats.drop(columns=['nt'])
        qlats.columns = qlats.columns.astype(int)
        qlats = qlats.sort_index(axis='columns').sort_index(axis='index')
        qlats = qlats.drop(columns=qlats.columns.difference(data.index)).T
        qlats = qlats.astype('float32')
    else:
        qlats = constant_qlats(data, nts, 10.0)


    connections = nhd_network.extract_connections(data, network_data["downstream_col"])
    rconn = nhd_network.reverse_network(connections)
    # rconn_annoated = translate_network_to_index(rconn, data.index)
    if verbose:
        print("supernetwork connections set complete")
    if showtiming:
        print("... in %s seconds." % (time.time() - start_time))

    # STEP 2
    if showtiming:
        start_time = time.time()
    if verbose:
        print("organizing connections into reaches ...")

    subnets = nhd_network.reachable_network(rconn)
    subreaches = {}
    for tw, net in subnets.items():
        path_func = partial(nhd_network.split_at_junction, net)
        reach = nhd_network.dfs_decomposition(
            nhd_network.reverse_network(net), path_func
        )
        subreaches[tw] = reach

    if verbose:
        print("reach organization complete")
    if showtiming:
        print("... in %s seconds." % (time.time() - start_time))

    if showtiming:
        start_time = time.time()

    # flowdepthvel = {node: {'flow': {'prev': 0, 'curr': 0}
    #    , 'depth': {'prev': 0, 'curr': 0}
    #    , 'vel': {'prev': 0, 'curr': 0}
    #    , 'qlat': {'prev': 0, 'curr': 0}} for node in nhd_network.nodes(connections)}

    parallelcompute = False

    # Data column ordering is very important as we directly lookup values.
    # The column order *must* be:
    # 0: bw, 1: tw, 2: twcc, 3: dx, 4: n_manning 5: n_manning_cc, 6: cs, 7: s0, 8: qlat
    data['dt'] = 300.0

    data = data.rename(columns={'Length': 'dx', 'TopWdth': 'tw', 'TopWdthCC': 'twcc',
        'BtmWdth': 'bw', 'nCC': 'ncc', 'So': 's0', 'ChSlp': 'cs'})
    data = data.astype('float32')    
    #datasub = data[['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']]
    
    #qlats = qlats.loc[:, :nts]
    compute_start = time.time()
    if parallelcompute:
        if verbose:
            print("executing computation on ordered reaches ...")
        with Parallel(
            n_jobs=-1, pre_dispatch="all", backend="threading", verbose=5
        ) as parallel:
            jobs = []
            for twi, (tw, reach) in enumerate(subreaches.items(), 1):
                r = list(chain.from_iterable(reach))
                #assert r[-1] == tw  # always be True
                #assert len(data.index.intersection(r)) == len(r)
                data_sub = data.loc[r, ['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']].sort_index()
                qlat_sub = qlats.loc[r].sort_index()
                jobs.append(
                    delayed(mc_reach.compute_network)(
                        nts, reach, subnets[tw], data_sub.index.values, data_sub.columns.values, data_sub.values, qlat_sub.values
                    )
                )
            random.shuffle(jobs)
            rets = parallel(jobs)
            #for findex, fdv in rets:
            #    flowdepthvel[findex] = fdv

    else:
        rets = []
        for twi, (tw, reach) in enumerate(subreaches.items(), 1):
            r = list(chain.from_iterable(reach))
            #assert r[-1] == tw  # always be True
            #assert len(data.index.intersection(r)) == len(r)
            data_sub = data.loc[r, ['dt', 'bw', 'tw', 'twcc', 'dx', 'n', 'ncc', 'cs', 's0']].sort_index()
            #TODO: Do the data_sub = data.loc as a preprocessing step, then dump the pointer to data
            qlat_sub = qlats.loc[r].sort_index()
            rets.append(
                mc_reach.compute_network(
                    nts, reach, subnets[tw], data_sub.index.values, data_sub.columns.values, data_sub.values, qlat_sub.values))
            # TODO: rets could be dumped to files
            #findex, fdv = mc_reach.compute_network(ts, reach, subnets[tw], data_idx, data_values, qlat_values)
            #flowdepthvel[findex] = fdv
            if verbose:
                print(
                    f"tailwater: {tw} completed",
                    end = "", 
                )
            # NOTE: Mississippi River tailwater is {22811611,}:

            if showtiming:
                print(
                    f"... in {time.time()-compute_start} seconds ({twi}/{len(subreaches)})"
                )

    print("Computation time: ", time.time() - compute_start)
    fdv_columns = pd.MultiIndex.from_product([range(nts), ['q', 'v', 'd']], names=['timestep', 'qvd'])
    flowveldepth = pd.concat([pd.DataFrame(d, index=i, columns=fdv_columns) for i, d in rets])
    print(flowveldepth)