def tearDown(self): super(VectorOpsTest, self).tearDown() safe_remove_dir(self.outdir)
def main(): parser = argparse.ArgumentParser(description='Riverscapes Context Tool', # epilog="This is an epilog" ) parser.add_argument('huc', help='HUC identifier', type=str) parser.add_argument('existing', help='National existing vegetation raster', type=str) parser.add_argument('historic', help='National historic vegetation raster', type=str) parser.add_argument('ownership', help='National land ownership shapefile', type=str) parser.add_argument('fairmarket', help='National fair market value raster', type=str) parser.add_argument('ecoregions', help='National EcoRegions shapefile', type=str) parser.add_argument('prism', help='Folder containing PRISM rasters in BIL format', type=str) parser.add_argument('output', help='Path to the output folder', type=str) parser.add_argument( 'download', help= 'Temporary folder for downloading data. Different HUCs may share this', type=str) parser.add_argument('--force', help='(optional) download existing files ', action='store_true', default=False) parser.add_argument( '--parallel', help= '(optional) for running multiple instances of this at the same time', action='store_true', default=False) parser.add_argument('--temp_folder', help='(optional) cache folder for downloading files ', type=str) parser.add_argument( '--meta', help='riverscapes project metadata as comma separated key=value pairs', type=str) parser.add_argument('--verbose', help='(optional) a little extra logging ', action='store_true', default=False) parser.add_argument( '--debug', help= '(optional) more output about things like memory usage. There is a performance cost', action='store_true', default=False) args = dotenv.parse_args_env(parser) # Initiate the log file log = Logger("RS Context") log.setup(logPath=os.path.join(args.output, "rs_context.log"), verbose=args.verbose) log.title('Riverscapes Context For HUC: {}'.format(args.huc)) log.info('HUC: {}'.format(args.huc)) log.info('EPSG: {}'.format(cfg.OUTPUT_EPSG)) log.info('Existing veg: {}'.format(args.existing)) log.info('Historical veg: {}'.format(args.historic)) log.info('Ownership: {}'.format(args.ownership)) log.info('Fair Market Value Raster: {}'.format(args.fairmarket)) log.info('Output folder: {}'.format(args.output)) log.info('Download folder: {}'.format(args.download)) log.info('Force download: {}'.format(args.force)) # This is a general place for unzipping downloaded files and other temporary work. # We use GUIDS to make it specific to a particular run of the tool to avoid unzip collisions parallel_code = "-" + str(uuid.uuid4()) if args.parallel is True else "" scratch_dir = args.temp_folder if args.temp_folder else os.path.join( args.download, 'scratch', 'rs_context{}'.format(parallel_code)) safe_makedirs(scratch_dir) meta = parse_metadata(args.meta) try: if args.debug is True: from rscommons.debug import ThreadRun memfile = os.path.join(args.output, 'rs_context_memusage.log') retcode, max_obj = ThreadRun( rs_context, memfile, args.huc, args.existing, args.historic, args.ownership, args.fairmarket, args.ecoregions, args.prism, args.output, args.download, scratch_dir, args.parallel, args.force, meta) log.debug('Return code: {}, [Max process usage] {}'.format( retcode, max_obj)) else: rs_context(args.huc, args.existing, args.historic, args.ownership, args.fairmarket, args.ecoregions, args.prism, args.output, args.download, scratch_dir, args.parallel, args.force, meta) except Exception as e: log.error(e) traceback.print_exc(file=sys.stdout) # Cleaning up the scratch folder is essential safe_remove_dir(scratch_dir) sys.exit(1) # Cleaning up the scratch folder is essential safe_remove_dir(scratch_dir) sys.exit(0)
def unzip(file_path, destination_folder, force_overwrite=False, retries=3): """[summary] Args: file_path: Full path to an existing zip archive destination_folder: Path where the zip archive will be unzipped force_overwrite (bool, optional): Force overwrite of a file if it's already there. Defaults to False. retries (int, optional): Number of retries on a single file. Defaults to 3. Raises: Exception: [description] Exception: [description] Exception: [description] """ log = Logger('Unzipper') if not os.path.isfile(file_path): raise Exception('Unzip error: file not found: {}'.format(file_path)) try: log.info('Attempting unzip: {} ==> {}'.format(file_path, destination_folder)) zip_ref = zipfile.ZipFile(file_path, 'r') # only unzip files we don't already have safe_makedirs(destination_folder) log.info('Extracting: {}'.format(file_path)) # Only unzip things we haven't already unzipped for fitem in zip_ref.filelist: uz_success = False uz_retry = 0 while not uz_success and uz_retry < retries: try: outfile = os.path.join(destination_folder, fitem.filename) if fitem.is_dir(): if not os.path.isdir(outfile): zip_ref.extract(fitem, destination_folder) log.debug(' (creating) {}'.format(fitem.filename)) else: log.debug(' (skipping) {}'.format(fitem.filename)) else: if force_overwrite or (fitem.file_size > 0 and not os.path.isfile(outfile)) or (os.path.getsize(outfile) / fitem.file_size) < 0.99999: log.debug(' (unzipping) {}'.format(fitem.filename)) zip_ref.extract(fitem, destination_folder) else: log.debug(' (skipping) {}'.format(fitem.filename)) uz_success = True except Exception as e: log.debug(e) log.warning('unzipping file failed. waiting 3 seconds and retrying...') time.sleep(3) uz_retry += 1 if (not uz_success): raise Exception('Unzipping of file {} failed after {} attempts'.format(fitem.filename, retries)) zip_ref.close() log.info('Done') except zipfile.BadZipFile as e: # If the zip file is bad then we have to remove it. log.error('BadZipFile. Cleaning up zip file and output folder') safe_remove_file(file_path) safe_remove_dir(destination_folder) raise Exception('Unzip error: BadZipFile') except Exception as e: log.error('Error unzipping. Cleaning up output folder') safe_remove_dir(destination_folder) raise Exception('Unzip error: file could not be unzipped')
def rs_context(huc, existing_veg, historic_veg, ownership, fair_market, ecoregions, prism_folder, output_folder, download_folder, scratch_dir, parallel, force_download, meta: Dict[str, str]): """ Download riverscapes context layers for the specified HUC and organize them as a Riverscapes project :param huc: Eight, 10 or 12 digit HUC identification number :param existing_veg: Path to the existing vegetation conditions raster :param historic_veg: Path to the historical vegetation conditions raster :param ownership: Path to the national land ownership Shapefile :param output_folder: Output location for the riverscapes context project :param download_folder: Temporary folder where downloads are cached. This can be shared between rs_context processes :param force_download: If false then downloads can be skipped if the files already exist :param prism_folder: folder containing PRISM rasters in *.bil format :param meta (Dict[str,str]): dictionary of riverscapes metadata key: value pairs :return: """ log = Logger("RS Context") log.info('Starting RSContext v.{}'.format(cfg.version)) try: int(huc) except ValueError: raise Exception( 'Invalid HUC identifier "{}". Must be an integer'.format(huc)) if not (len(huc) in [4, 8, 10, 12]): raise Exception( 'Invalid HUC identifier. Must be 4, 8, 10 or 12 digit integer') safe_makedirs(output_folder) safe_makedirs(download_folder) # We need a temporary folder for slope rasters, Stitching inputs, intermeditary products, etc. scratch_dem_folder = os.path.join(scratch_dir, 'rs_context', huc) safe_makedirs(scratch_dem_folder) project, realization = create_project(huc, output_folder) hydrology_gpkg_path = os.path.join(output_folder, LayerTypes['HYDROLOGY'].rel_path) dem_node, dem_raster = project.add_project_raster(realization, LayerTypes['DEM']) _node, hill_raster = project.add_project_raster(realization, LayerTypes['HILLSHADE']) _node, flow_accum = project.add_project_raster(realization, LayerTypes['FA']) _node, drain_area = project.add_project_raster(realization, LayerTypes['DA']) hand_node, hand_raster = project.add_project_raster( realization, LayerTypes['HAND']) _node, slope_raster = project.add_project_raster(realization, LayerTypes['SLOPE']) _node, existing_clip = project.add_project_raster(realization, LayerTypes['EXVEG']) _node, historic_clip = project.add_project_raster(realization, LayerTypes['HISTVEG']) _node, fair_market_clip = project.add_project_raster( realization, LayerTypes['FAIR_MARKET']) # Download the four digit NHD archive containing the flow lines and watershed boundaries log.info('Processing NHD') # Incorporate project metadata to the riverscapes project if meta is not None: project.add_metadata(meta) nhd_download_folder = os.path.join(download_folder, 'nhd', huc[:4]) nhd_unzip_folder = os.path.join(scratch_dir, 'nhd', huc[:4]) nhd, db_path, huc_name, nhd_url = clean_nhd_data( huc, nhd_download_folder, nhd_unzip_folder, os.path.join(output_folder, 'hydrology'), cfg.OUTPUT_EPSG, False) # Clean up the unzipped files. We won't need them again if parallel: safe_remove_dir(nhd_unzip_folder) project.add_metadata({'Watershed': huc_name}) boundary = 'WBDHU{}'.format(len(huc)) # For coarser rasters than the DEM we need to buffer our clip polygon to include enough pixels # This shouldn't be too much more data because these are usually integer rasters that are much lower res. buffered_clip_path100 = os.path.join( hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['BUFFEREDCLIP100'].rel_path) copy_feature_class(nhd[boundary], buffered_clip_path100, epsg=cfg.OUTPUT_EPSG, buffer=100) buffered_clip_path500 = os.path.join( hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['BUFFEREDCLIP500'].rel_path) copy_feature_class(nhd[boundary], buffered_clip_path500, epsg=cfg.OUTPUT_EPSG, buffer=500) # PRISM climate rasters mean_annual_precip = None bil_files = glob.glob(os.path.join(prism_folder, '**', '*.bil')) if (len(bil_files) == 0): raise Exception('Could not find any .bil files in the prism folder') for ptype in PrismTypes: try: # Next should always be guarded source_raster_path = next( x for x in bil_files if ptype.lower() in os.path.basename(x).lower()) except StopIteration: raise Exception( 'Could not find .bil file corresponding to "{}"'.format(ptype)) _node, project_raster_path = project.add_project_raster( realization, LayerTypes[ptype]) raster_warp(source_raster_path, project_raster_path, cfg.OUTPUT_EPSG, buffered_clip_path500, {"cutlineBlend": 1}) # Use the mean annual precipitation to calculate bankfull width if ptype.lower() == 'ppt': polygon = get_geometry_unary_union(nhd[boundary], epsg=cfg.OUTPUT_EPSG) mean_annual_precip = raster_buffer_stats2( {1: polygon}, project_raster_path)[1]['Mean'] log.info('Mean annual precipitation for HUC {} is {} mm'.format( huc, mean_annual_precip)) project.add_metadata( {'mean_annual_precipitation_mm': str(mean_annual_precip)}) calculate_bankfull_width(nhd['NHDFlowline'], mean_annual_precip) # Add the DB record to the Project XML db_lyr = RSLayer('NHD Tables', 'NHDTABLES', 'SQLiteDB', os.path.relpath(db_path, output_folder)) sqlite_el = project.add_dataset(realization, db_path, db_lyr, 'SQLiteDB') project.add_metadata({'origin_url': nhd_url}, sqlite_el) # Add any results to project XML for name, file_path in nhd.items(): lyr_obj = RSLayer(name, name, 'Vector', os.path.relpath(file_path, output_folder)) vector_nod, _fpath = project.add_project_vector(realization, lyr_obj) project.add_metadata({'origin_url': nhd_url}, vector_nod) states = get_nhd_states(nhd[boundary]) # Download the NTD archive containing roads and rail log.info('Processing NTD') ntd_raw = {} ntd_unzip_folders = [] ntd_urls = get_ntd_urls(states) for state, ntd_url in ntd_urls.items(): ntd_download_folder = os.path.join(download_folder, 'ntd', state.lower()) ntd_unzip_folder = os.path.join( scratch_dir, 'ntd', state.lower(), 'unzipped' ) # a little awkward but I need a folder for this and this was the best name I could find ntd_raw[state] = download_shapefile_collection(ntd_url, ntd_download_folder, ntd_unzip_folder, force_download) ntd_unzip_folders.append(ntd_unzip_folder) ntd_clean = clean_ntd_data(ntd_raw, nhd['NHDFlowline'], nhd[boundary], os.path.join(output_folder, 'transportation'), cfg.OUTPUT_EPSG) # clean up the NTD Unzip folder. We won't need it again if parallel: for unzip_path in ntd_unzip_folders: safe_remove_dir(unzip_path) # Write transportation layers to project file log.info('Write transportation layers to project file') # Add any results to project XML for name, file_path in ntd_clean.items(): lyr_obj = RSLayer(name, name, 'Vector', os.path.relpath(file_path, output_folder)) ntd_node, _fpath = project.add_project_vector(realization, lyr_obj) project.add_metadata({**ntd_urls}, ntd_node) # Download the HAND raster huc6 = huc[0:6] hand_download_folder = os.path.join(download_folder, 'hand') _hpath, hand_url = download_hand(huc6, cfg.OUTPUT_EPSG, hand_download_folder, nhd[boundary], hand_raster, warp_options={"cutlineBlend": 1}) project.add_metadata({'origin_url': hand_url}, hand_node) # download contributing DEM rasters, mosaic and reproject into compressed GeoTIF ned_download_folder = os.path.join(download_folder, 'ned') ned_unzip_folder = os.path.join(scratch_dir, 'ned') dem_rasters, urls = download_dem(nhd[boundary], cfg.OUTPUT_EPSG, 0.01, ned_download_folder, ned_unzip_folder, force_download) need_dem_rebuild = force_download or not os.path.exists(dem_raster) if need_dem_rebuild: raster_vrt_stitch(dem_rasters, dem_raster, cfg.OUTPUT_EPSG, clip=nhd[boundary], warp_options={"cutlineBlend": 1}) verify_areas(dem_raster, nhd[boundary]) # Calculate slope rasters seperately and then stitch them slope_parts = [] hillshade_parts = [] need_slope_build = need_dem_rebuild or not os.path.isfile(slope_raster) need_hs_build = need_dem_rebuild or not os.path.isfile(hill_raster) project.add_metadata( { 'num_rasters': str(len(urls)), 'origin_urls': json.dumps(urls) }, dem_node) for dem_r in dem_rasters: slope_part_path = os.path.join( scratch_dem_folder, 'SLOPE__' + os.path.basename(dem_r).split('.')[0] + '.tif') hs_part_path = os.path.join( scratch_dem_folder, 'HS__' + os.path.basename(dem_r).split('.')[0] + '.tif') slope_parts.append(slope_part_path) hillshade_parts.append(hs_part_path) if force_download or need_dem_rebuild or not os.path.exists( slope_part_path): gdal_dem_geographic(dem_r, slope_part_path, 'slope') need_slope_build = True if force_download or need_dem_rebuild or not os.path.exists( hs_part_path): gdal_dem_geographic(dem_r, hs_part_path, 'hillshade') need_hs_build = True if need_slope_build: raster_vrt_stitch(slope_parts, slope_raster, cfg.OUTPUT_EPSG, clip=nhd[boundary], clean=parallel, warp_options={"cutlineBlend": 1}) verify_areas(slope_raster, nhd[boundary]) else: log.info('Skipping slope build because nothing has changed.') if need_hs_build: raster_vrt_stitch(hillshade_parts, hill_raster, cfg.OUTPUT_EPSG, clip=nhd[boundary], clean=parallel, warp_options={"cutlineBlend": 1}) verify_areas(hill_raster, nhd[boundary]) else: log.info('Skipping hillshade build because nothing has changed.') # Remove the unzipped rasters. We won't need them anymore if parallel: safe_remove_dir(ned_unzip_folder) # Calculate flow accumulation raster based on the DEM log.info('Running flow accumulation and converting to drainage area.') flow_accumulation(dem_raster, flow_accum, dinfinity=False, pitfill=True) flow_accum_to_drainage_area(flow_accum, drain_area) # Clip and re-project the existing and historic vegetation log.info('Processing existing and historic vegetation rasters.') clip_vegetation(buffered_clip_path100, existing_veg, existing_clip, historic_veg, historic_clip, cfg.OUTPUT_EPSG) log.info('Process the Fair Market Value Raster.') raster_warp(fair_market, fair_market_clip, cfg.OUTPUT_EPSG, clip=buffered_clip_path500, warp_options={"cutlineBlend": 1}) # Clip the landownership Shapefile to a 10km buffer around the watershed boundary own_path = os.path.join(output_folder, LayerTypes['OWNERSHIP'].rel_path) project.add_dataset(realization, own_path, LayerTypes['OWNERSHIP'], 'Vector') clip_ownership(nhd[boundary], ownership, own_path, cfg.OUTPUT_EPSG, 10000) ####################################################### # Segmentation ####################################################### # For now let's just make a copy of the NHD FLowlines tmr = Timer() rs_segmentation(nhd['NHDFlowline'], ntd_clean['Roads'], ntd_clean['Rail'], own_path, hydrology_gpkg_path, SEGMENTATION['Max'], SEGMENTATION['Min'], huc) log.debug('Segmentation done in {:.1f} seconds'.format(tmr.ellapsed())) project.add_project_geopackage(realization, LayerTypes['HYDROLOGY']) # Add Bankfull Buffer Polygons bankfull_path = os.path.join( hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['BANKFULL_CHANNEL'].rel_path) bankfull_buffer( os.path.join(hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['NETWORK'].rel_path), cfg.OUTPUT_EPSG, bankfull_path, ) # TODO Add nhd/bankfull union when merge feature classes in vector.ops works with Geopackage layers # bankfull_nhd_path = os.path.join(hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['COMPOSITE_CHANNEL_AREA'].rel_path) # clip_path = os.path.join(hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['BUFFEREDCLIP500'].rel_path) # bankfull_nhd_area(bankfull_path, nhd['NHDArea'], clip_path, cfg.OUTPUT_EPSG, hydrology_gpkg_path, LayerTypes['HYDROLOGY'].sub_layers['COMPOSITE_CHANNEL_AREA'].rel_path) # Filter the ecoregions Shapefile to only include attributes that intersect with our HUC eco_path = os.path.join(output_folder, 'ecoregions', 'ecoregions.shp') project.add_dataset(realization, eco_path, LayerTypes['ECOREGIONS'], 'Vector') filter_ecoregions(nhd[boundary], ecoregions, eco_path, cfg.OUTPUT_EPSG, 10000) report_path = os.path.join(project.project_dir, LayerTypes['REPORT'].rel_path) project.add_report(realization, LayerTypes['REPORT'], replace=True) report = RSContextReport(report_path, project, output_folder) report.write() log.info('Process completed successfully.') return { 'DEM': dem_raster, 'Slope': slope_raster, 'ExistingVeg': existing_veg, 'HistoricVeg': historic_veg, 'NHD': nhd }
def calc_conflict_attributes(flowlines_path, valley_bottom, roads, rail, canals, ownership, buffer_distance_metres, cell_size_meters, epsg, canal_codes, intermediates_gpkg_path): log = Logger('Conflict') log.info('Calculating conflict attributes') # Create union of all reaches and another of the reaches without any canals reach_union = get_geometry_unary_union(flowlines_path) if canal_codes is None: reach_union_no_canals = reach_union else: reach_union_no_canals = get_geometry_unary_union( flowlines_path, attribute_filter='FCode NOT IN ({})'.format(','.join(canal_codes))) crossin = intersect_geometry_to_layer(intermediates_gpkg_path, 'road_crossings', ogr.wkbMultiPoint, reach_union, roads, epsg) diverts = intersect_geometry_to_layer(intermediates_gpkg_path, 'diversions', ogr.wkbMultiPoint, reach_union_no_canals, canals, epsg) road_vb = intersect_to_layer(intermediates_gpkg_path, valley_bottom, roads, 'road_valleybottom', ogr.wkbMultiLineString, epsg) rail_vb = intersect_to_layer(intermediates_gpkg_path, valley_bottom, rail, 'rail_valleybottom', ogr.wkbMultiLineString, epsg) private = os.path.join(intermediates_gpkg_path, 'private_land') copy_feature_class(ownership, private, epsg, "ADMIN_AGEN = 'PVT' OR ADMIN_AGEN = 'UND'") # Buffer all reaches (being careful to use the units of the Shapefile) reaches = load_geometries(flowlines_path, epsg=epsg) with get_shp_or_gpkg(flowlines_path) as lyr: buffer_distance = lyr.rough_convert_metres_to_vector_units( buffer_distance_metres) cell_size = lyr.rough_convert_metres_to_vector_units(cell_size_meters) geopackage_path = lyr.filepath polygons = { reach_id: polyline.buffer(buffer_distance) for reach_id, polyline in reaches.items() } results = {} tmp_folder = os.path.join(os.path.dirname(intermediates_gpkg_path), 'tmp_conflict') distance_from_features(polygons, tmp_folder, reach_union.bounds, cell_size_meters, cell_size, results, road_vb, 'Mean', 'iPC_RoadVB') distance_from_features(polygons, tmp_folder, reach_union.bounds, cell_size_meters, cell_size, results, crossin, 'Mean', 'iPC_RoadX') distance_from_features(polygons, tmp_folder, reach_union.bounds, cell_size_meters, cell_size, results, diverts, 'Mean', 'iPC_DivPts') distance_from_features(polygons, tmp_folder, reach_union.bounds, cell_size_meters, cell_size, results, private, 'Mean', 'iPC_Privat') distance_from_features(polygons, tmp_folder, reach_union.bounds, cell_size_meters, cell_size, results, rail_vb, 'Mean', 'iPC_RailVB') distance_from_features(polygons, tmp_folder, reach_union.bounds, cell_size_meters, cell_size, results, canals, 'Mean', 'iPC_Canal') distance_from_features(polygons, tmp_folder, reach_union.bounds, cell_size_meters, cell_size, results, roads, 'Mean', 'iPC_Road') distance_from_features(polygons, tmp_folder, reach_union.bounds, cell_size_meters, cell_size, results, rail, 'Mean', 'iPC_Rail') # Calculate minimum distance to conflict min_keys = [ 'iPC_Road', 'iPC_RoadX', 'iPC_RoadVB', 'iPC_Rail', 'iPC_RailVB' ] for values in results.values(): values['oPC_Dist'] = min([values[x] for x in min_keys if x in values]) # Retrieve the agency responsible for administering the land at the midpoint of each reach admin_agency(geopackage_path, reaches, ownership, results) log.info('Conflict attribute calculation complete') # Cleanup temporary feature classes safe_remove_dir(tmp_folder) return results