Exemplo n.º 1
0
def pull_and_prepare_wbd(path_to_saved_data_parent_dir, nwm_dir_name,
                         nwm_file_to_use, overwrite_wbd, num_workers):
    """
    This helper function pulls and unzips Watershed Boundary Dataset (WBD) data. It uses the WBD URL defined by WBD_NATIONAL_URL.
    This function also subsets the WBD layers (HU4, HU6, HU8) to CONUS and converts to geopkacage layers.

    Args:
        path_to_saved_data_parent_dir (str): The system path to where the WBD will be downloaded, unzipped, and preprocessed.

    """

    # Construct path to wbd_directory and create if not existent.
    wbd_directory = os.path.join(path_to_saved_data_parent_dir, 'wbd')
    if not os.path.exists(wbd_directory):
        os.mkdir(wbd_directory)

    wbd_gdb_path = os.path.join(wbd_directory, 'WBD_National_GDB.gdb')
    pulled_wbd_zipped_path = os.path.join(wbd_directory,
                                          'WBD_National_GDB.zip')

    multilayer_wbd_geopackage = os.path.join(wbd_directory,
                                             'WBD_National.gpkg')

    nwm_huc_list_file_template = os.path.join(wbd_directory, 'nwm_wbd{}.csv')

    nwm_file_to_use = os.path.join(path_to_saved_data_parent_dir, nwm_dir_name,
                                   nwm_file_to_use)
    if not os.path.isfile(nwm_file_to_use):
        raise IOError(
            "NWM File to Subset Too Not Available: {}".format(nwm_file_to_use))

    if not os.path.exists(multilayer_wbd_geopackage) or overwrite_wbd:
        # Download WBD and unzip if it's not already done.
        if not os.path.exists(wbd_gdb_path):
            if not os.path.exists(pulled_wbd_zipped_path):
                pull_file(WBD_NATIONAL_URL, pulled_wbd_zipped_path)
            os.system(
                "7za x {pulled_wbd_zipped_path} -o{wbd_directory}".format(
                    pulled_wbd_zipped_path=pulled_wbd_zipped_path,
                    wbd_directory=wbd_directory))

        procs_list, wbd_gpkg_list = [], []
        multilayer_wbd_geopackage = os.path.join(wbd_directory,
                                                 'WBD_National.gpkg')
        # Add fimid to HU8, project, and convert to geopackage.
        if os.path.isfile(multilayer_wbd_geopackage):
            os.remove(multilayer_wbd_geopackage)
        print("Making National WBD GPKG...")
        print("\tWBDHU8")
        wbd_hu8 = gpd.read_file(wbd_gdb_path, layer='WBDHU8')
        wbd_hu8 = wbd_hu8.rename(columns={'huc8':
                                          'HUC8'})  # rename column to caps
        wbd_hu8 = wbd_hu8.sort_values('HUC8')
        fimids = [
            str(item).zfill(4)
            for item in list(range(1000, 1000 + len(wbd_hu8)))
        ]
        wbd_hu8[FIM_ID] = fimids
        wbd_hu8 = wbd_hu8.to_crs(PREP_PROJECTION)  # Project.
        wbd_hu8 = subset_wbd_to_nwm_domain(wbd_hu8, nwm_file_to_use)
        wbd_hu8.geometry = wbd_hu8.buffer(0)
        wbd_hu8.to_file(multilayer_wbd_geopackage,
                        layer='WBDHU8',
                        driver=getDriver(multilayer_wbd_geopackage),
                        index=False)  # Save.
        wbd_hu8.HUC8.to_csv(nwm_huc_list_file_template.format('8'),
                            index=False,
                            header=False)
        #wbd_gpkg_list.append(os.path.join(wbd_directory, 'WBDHU8.gpkg'))  # Append to wbd_gpkg_list for subsetting later.
        del wbd_hu8

        # Prepare procs_list for multiprocessed geopackaging.
        for wbd_layer_num in ['4', '6']:
            wbd_layer = 'WBDHU' + wbd_layer_num
            print("\t{}".format(wbd_layer))
            wbd = gpd.read_file(wbd_gdb_path, layer=wbd_layer)
            wbd = wbd.to_crs(PREP_PROJECTION)
            wbd = wbd.rename(
                columns={'huc' + wbd_layer_num: 'HUC' + wbd_layer_num})
            wbd = subset_wbd_to_nwm_domain(wbd, nwm_file_to_use)
            wbd.geometry = wbd.buffer(0)
            wbd.to_file(multilayer_wbd_geopackage,
                        layer=wbd_layer,
                        driver=getDriver(multilayer_wbd_geopackage),
                        index=False)
            wbd['HUC{}'.format(wbd_layer_num)].to_csv(
                nwm_huc_list_file_template.format(wbd_layer_num),
                index=False,
                header=False)
            #output_gpkg = os.path.join(wbd_directory, wbd_layer + '.gpkg')
            #wbd_gpkg_list.append(output_gpkg)
            #procs_list.append(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {wbd_gdb_path} {wbd_layer}'.format(output_gpkg=output_gpkg, wbd_gdb_path=wbd_gdb_path, wbd_layer=wbd_layer, projection=PREP_PROJECTION)])

        # with Pool(processes=num_workers) as pool:
        # pool.map(run_system_command, procs_list)

        # Subset WBD layers to CONUS and add to single geopackage.
        #print("Subsetting WBD layers to CONUS...")
        #multilayer_wbd_geopackage = os.path.join(wbd_directory, 'WBD_National.gpkg')
        #for gpkg in wbd_gpkg_list:
        #    subset_wbd_gpkg(gpkg, multilayer_wbd_geopackage)

    # Clean up temporary files.
    #for temp_layer in ['WBDHU4', 'WBDHU6', 'WBDHU8']:
    #    delete_file(os.path.join(wbd_directory, temp_layer + '.gpkg'))
    #pulled_wbd_zipped_path = os.path.join(wbd_directory, 'WBD_National_GDB.zip')
    #delete_file(pulled_wbd_zipped_path)
    #delete_file(os.path.join(wbd_directory, 'WBD_National_GDB.jpg'))

    return (wbd_directory)
Exemplo n.º 2
0
def pull_and_prepare_nhd_data(args):
    """
    This helper function is designed to be multiprocessed. It pulls and unzips NHD raster and vector data.
    Args:
        args (list): A list of arguments in this format: [nhd_raster_download_url, nhd_raster_extraction_path, nhd_vector_download_url, nhd_vector_extraction_path]
    """
    # Parse urls and extraction paths from procs_list.
    nhd_raster_download_url = args[0]
    nhd_raster_extraction_path = args[1]
    nhd_vector_download_url = args[2]
    nhd_vector_extraction_path = args[3]
    overwrite_nhd = args[4]

    nhd_gdb = nhd_vector_extraction_path.replace(
        '.zip', '.gdb')  # Update extraction path from .zip to .gdb.

    # Download raster and vector, if not already in user's directory (exist check performed by pull_file()).
    nhd_raster_extraction_parent = os.path.dirname(nhd_raster_extraction_path)
    huc = os.path.basename(nhd_raster_extraction_path).split('_')[2]

    nhd_raster_parent_dir = os.path.join(nhd_raster_extraction_parent,
                                         'HRNHDPlusRasters' + huc)

    if not os.path.exists(nhd_raster_parent_dir):
        os.mkdir(nhd_raster_parent_dir)

    elev_cm_tif = os.path.join(nhd_raster_parent_dir, 'elev_cm.tif')
    if not os.path.exists(elev_cm_tif) or overwrite_nhd:
        pull_file(nhd_raster_download_url, nhd_raster_extraction_path)
        os.system(
            "7za e {nhd_raster_extraction_path} -o{nhd_raster_parent_dir} elev_cm.tif -r "
            .format(nhd_raster_extraction_path=nhd_raster_extraction_path,
                    nhd_raster_parent_dir=nhd_raster_parent_dir))

        file_list = os.listdir(nhd_raster_parent_dir)
        for f in file_list:
            full_path = os.path.join(nhd_raster_parent_dir, f)
            if 'elev_cm' not in f:
                if os.path.isdir(full_path):
                    shutil.rmtree(full_path)
                elif os.path.isfile(full_path):
                    os.remove(full_path)
        os.remove(nhd_raster_extraction_path)

    nhd_vector_extraction_parent = os.path.dirname(nhd_vector_extraction_path)

    if not os.path.exists(nhd_vector_extraction_parent):
        os.mkdir(nhd_vector_extraction_parent)

    if not os.path.exists(
            nhd_gdb
    ) or overwrite_nhd:  # Only pull if not already pulled and processed.
        # Download and fully unzip downloaded GDB.
        pull_file(nhd_vector_download_url, nhd_vector_extraction_path)
        huc = os.path.split(nhd_vector_extraction_parent)[1]  # Parse HUC.
        os.system(
            "7za x {nhd_vector_extraction_path} -o{nhd_vector_extraction_parent}"
            .format(nhd_vector_extraction_path=nhd_vector_extraction_path,
                    nhd_vector_extraction_parent=nhd_vector_extraction_parent))
        # extract input stream network
        nhd = gpd.read_file(nhd_gdb, layer='NHDPlusBurnLineEvent')
        nhd = nhd.to_crs(PREP_PROJECTION)
        nhd.to_file(os.path.join(nhd_vector_extraction_parent,
                                 'NHDPlusBurnLineEvent' + huc + '.gpkg'),
                    driver='GPKG')
        # extract flowlines for FType attributes
        nhd = gpd.read_file(nhd_gdb, layer='NHDFlowline')
        nhd = nhd.to_crs(PREP_PROJECTION)
        nhd.to_file(os.path.join(nhd_vector_extraction_parent,
                                 'NHDFlowline' + huc + '.gpkg'),
                    driver='GPKG')
        # extract attributes
        nhd = gpd.read_file(nhd_gdb, layer='NHDPlusFlowLineVAA')
        nhd.to_file(os.path.join(nhd_vector_extraction_parent,
                                 'NHDPlusFlowLineVAA' + huc + '.gpkg'),
                    driver='GPKG')
        # -- Project and convert NHDPlusBurnLineEvent and NHDPlusFlowLineVAA vectors to geopackage -- #
        #for nhd_layer in ['NHDPlusBurnLineEvent', 'NHDPlusFlowlineVAA']:
        #    run_system_command(['ogr2ogr -overwrite -progress -f GPKG -t_srs "{projection}" {output_gpkg} {nhd_gdb} {nhd_layer}'.format(projection=PREP_PROJECTION, output_gpkg=output_gpkg, nhd_gdb=nhd_gdb, nhd_layer=nhd_layer)])  # Use list because function is configured for multiprocessing.
    # Delete unnecessary files.
    delete_file(nhd_vector_extraction_path.replace('.zip', '.jpg'))
    delete_file(nhd_vector_extraction_path)  # Delete the zipped GDB.