def convert_shapefiles(pred=None, node_threshold=None): # Run with MPI: mpirun -n 8 prep_shapefiles.py nfie_version = 'node-threshold-10000' name_uid = 'GRIDCODE' storage_dir_shp = os.path.expanduser( '/media/benkoziol/Extra Drive 1/data/nfie/linked_catchment_shapefiles') storage_dir_esmf = os.path.expanduser( '/media/benkoziol/Extra Drive 1/data/nfie/node-thresholded-10000') esmf_name_template = 'esmf_format_{cid}_{nfie_version}.nc' log.info('Starting conversion for: {}'.format(nfie_version)) for dirpath, dirnames, filenames in os.walk( os.path.join(storage_dir_shp, storage_dir_shp)): for fn in filenames: if fn.endswith('.shp'): log.debug(fn) if pred is not None and not pred(fn): continue cid = re.search('linked_(.*).shp', fn).group(1) path_catchment_shp = os.path.join(dirpath, fn) esmf_name = esmf_name_template.format( cid=cid, nfie_version=nfie_version) path_esmf_format_nc = os.path.join(storage_dir_esmf, esmf_name) log.info('Converting {}'.format(path_catchment_shp)) # log.debug((path_esmf_format_nc, path_catchment_shp, name_uid)) try: convert_to_esmf_format(path_esmf_format_nc, path_catchment_shp, name_uid, node_threshold=node_threshold) except: log.exception(path_catchment_shp)
def make_shapefiles_and_esmf_format(): log.level = logbook.DEBUG template_shapefile = 'single_element_{gridcode}_{node_count}.shp' template_esmf_format = 'esmf_single_element_{gridcode}_{node_count}.nc' to_process = get_representative_nodes() for tp in to_process: log.debug(tp) gridcode = tp['gridcode'] node_count = tp['node_count'] filename_shapefile = template_shapefile.format(gridcode=gridcode, node_count=node_count) filename_esmf_format = template_esmf_format.format(gridcode=gridcode, node_count=node_count) # Extract the element and write the shapefile. sink_path = join(WD, filename_shapefile) source_path = get_source_shapefile(SHAPEFILE_DIR, tp['name']) log.debug(source_path) found = False with fiona.open(source_path) as source: with fiona.open(sink_path, 'w', **source.meta) as sink: for record in source: if record['properties']['GRIDCODE'] == gridcode: found = True sink.write(record) break assert found # Convert the shapefile to ESMF format. path_out_nc = join(WD, filename_esmf_format) path_in_shp = sink_path convert_to_esmf_format(path_out_nc, path_in_shp, 'GRIDCODE')
def run_create_linked_shapefile(): log.level = INFO name_uid = 'GRIDCODE' output_variable = 'pr' directory_shapefiles = expanduser('~/storage/catchment_shapefiles') directory_linked_shapefiles = expanduser('~/storage/linked_catchment_shapefiles') directory_weighted_data = expanduser('~/storage/catchment_weighted_data') if MPI_RANK == 0: weighted_data_files = filter(lambda x: x.startswith('pr_weighted'), listdir(directory_weighted_data)) weighted_data_files = [weighted_data_files[slc[0]: slc[1]] for slc in create_sections(len(weighted_data_files))] else: weighted_data_files = None weighted_data_files = MPI_COMM.scatter(weighted_data_files, root=0) for ii in weighted_data_files: path_output_data = join(directory_weighted_data, ii) # log.debug(ii) res = re.search('pr_weighted-catchments_esmf_(.*)_', ii).groups()[0] # log.debug(res.groups()[0]) shapefile_directory = join(directory_shapefiles, res) path_in_shp = check_output(['find', shapefile_directory, '-name', '*Catchment.shp']).strip() path_linked_shp = join(directory_linked_shapefiles, 'linked_{}.shp'.format(res)) log.debug((path_in_shp, path_linked_shp)) log.info('Creating linked shapefile for: {}'.format(path_output_data)) create_linked_shapefile(name_uid, output_variable, path_in_shp, path_linked_shp, path_output_data)
def get_representative_nodes(): """Get elements with a reasonable distribution of node counts.""" step = 1000 start = 0 stop = 60000 limit = 50 record_names = ['name', 'gridcode', 'node_count'] ranges = [] to_process = [] s = Session() while start < stop: ranges.append([start, start + step]) start += step for ctr, select_range in enumerate(ranges): log.debug(select_range) q = s.query(VectorProcessingUnit.name, Catchment.gridcode, Catchment.node_count).join(Catchment) q = q.filter(Catchment.node_count >= select_range[0]).filter(Catchment.node_count < select_range[1]) q = q.order_by(func.random()).limit(limit) for idx, record in enumerate(q): to_process.append(dict(zip(record_names, record))) s.close() return to_process
def run_create_linked_shapefile(): log.level = INFO name_uid = 'GRIDCODE' output_variable = 'pr' directory_shapefiles = expanduser('~/storage/catchment_shapefiles') directory_linked_shapefiles = expanduser( '~/storage/linked_catchment_shapefiles') directory_weighted_data = expanduser('~/storage/catchment_weighted_data') if MPI_RANK == 0: weighted_data_files = filter(lambda x: x.startswith('pr_weighted'), listdir(directory_weighted_data)) weighted_data_files = [ weighted_data_files[slc[0]:slc[1]] for slc in create_sections(len(weighted_data_files)) ] else: weighted_data_files = None weighted_data_files = MPI_COMM.scatter(weighted_data_files, root=0) for ii in weighted_data_files: path_output_data = join(directory_weighted_data, ii) # log.debug(ii) res = re.search('pr_weighted-catchments_esmf_(.*)_', ii).groups()[0] # log.debug(res.groups()[0]) shapefile_directory = join(directory_shapefiles, res) path_in_shp = check_output( ['find', shapefile_directory, '-name', '*Catchment.shp']).strip() path_linked_shp = join(directory_linked_shapefiles, 'linked_{}.shp'.format(res)) log.debug((path_in_shp, path_linked_shp)) log.info('Creating linked shapefile for: {}'.format(path_output_data)) create_linked_shapefile(name_uid, output_variable, path_in_shp, path_linked_shp, path_output_data)
def from_shapefile(path, name_uid, mesh_name='mesh', path_rtree=None, use_ragged_arrays=False, with_connectivity=True, allow_multipart=False, node_threshold=None, driver_kwargs=None, debug=False, dest_crs=None, split_interiors=True): """ Create a flexible mesh from a target shapefile. >>> path = '/input/target.shp' >>> name_uid = 'UID' >>> fm = FlexibleMesh.from_shapefile(path, name_uid) :param path: Path to the target shapefile. :type path: str :param name_uid: Name of the integer unique identifier in the target shapefile. This value will be maintained on the output mesh object. :type name_uid: str :param mesh_name: Name of the mesh catalog variable. :type mesh: str :param path_rtree: Path to a serialized spatial index object created using ``rtree``. Use :func:`pyugrid.flexible_mesh.helpers.create_rtree_file` to create a persistent ``rtree`` spatial index file. :type path_rtree: str :rtype: :class:`pyugrid.flexible_mesh.core.FlexibleMesh` """ # tdk: update doc from utools.io.geom_manager import GeometryManager if debug: slc = [0, 1] else: slc = None log.debug('creating geometry manager') log.debug(('driver_kwargs', driver_kwargs)) gm = GeometryManager(name_uid, path=path, path_rtree=path_rtree, allow_multipart=allow_multipart, node_threshold=node_threshold, slc=slc, driver_kwargs=driver_kwargs, dest_crs=dest_crs, split_interiors=split_interiors) log.debug('geometry manager created') ret = get_flexible_mesh(gm, mesh_name, use_ragged_arrays, with_connectivity=with_connectivity) log.debug('mesh collection returned') return ret
def convert(source_uid, source, esmf_format, feature_class, config_path, dest_crs_index, node_threshold, split, debug): from utools.prep.prep_shapefiles import convert_to_esmf_format log_entry('info', 'Started converting to ESMF format: {}'.format(source), rank=0) # Set the feature class name even if it is None. Feature class name is required for a file geodatabase. driver_kwargs = {'feature_class': feature_class} # If there is a destination CRS, read in the value and convert to a spatial reference object for the geometry # manager. if dest_crs_index is not None: log.debug(('dest_crs_index', dest_crs_index)) dest_crs_index = dest_crs_index.split(',') if len(dest_crs_index) == 1: crs_section = None crs_option = dest_crs_index[0] elif len(dest_crs_index) == 2: crs_section, crs_option = dest_crs_index else: raise NotImplementedError(len(dest_crs_index)) sp = SafeConfigParser() sp.read(config_path) crs_wkt = sp.get(crs_section, crs_option) dest_crs = osgeo.osr.SpatialReference() dest_crs.ImportFromWkt(crs_wkt) else: dest_crs = None convert_to_esmf_format(esmf_format, source, source_uid, node_threshold=node_threshold, driver_kwargs=driver_kwargs, debug=debug, dest_crs=dest_crs, split_interiors=split) log_entry('info', 'Finished converting to ESMF format: {}'.format(source), rank=0)
def test_coordinates(self): path = '/home/benkoziol/data/pmesh/catchment_shapefiles/NHDPlusTX/NHDPlus12/NHDPlusCatchment/Catchment.shp' with fiona.open(path, 'r') as source: for record in source: log.debug(record['properties']['GRIDCODE']) geom = shape(record['geometry']) if isinstance(geom, MultiPolygon): itr = geom else: itr = [geom] for polygon in itr: polygon = get_oriented_and_valid_geometry(polygon) self.assertTrue(polygon.exterior.is_ccw) coords = np.array(polygon.exterior.coords) try: self.assertTrue(coords.shape[0] > 1) except AssertionError: log.error('AssertionError GRIDCODE={}'.format( record['properties']['GRIDCODE'])) continue
def convert_to_esmf_format(path_out_nc, path_in_shp, name_uid, node_threshold=None, debug=False, driver_kwargs=None, dest_crs=None, with_connectivity=False, dataset_kwargs=None): polygon_break_value = UgridToolsConstants.POLYGON_BREAK_VALUE log.debug('loading flexible mesh') coll = from_shapefile(path_in_shp, name_uid, use_ragged_arrays=True, with_connectivity=with_connectivity, allow_multipart=True, node_threshold=node_threshold, debug=debug, driver_kwargs=driver_kwargs, dest_crs=dest_crs) log.debug('writing flexible mesh') convert_collection_to_esmf_format(coll, path_out_nc, polygon_break_value=polygon_break_value, face_uid_name=name_uid, dataset_kwargs=dataset_kwargs) # validate_esmf_format(ds, name_uid, path_in_shp) log.debug('success')
def make_weight_files(): n_jobs = 16 to_process = [] log.level = logbook.INFO for ctr, l in enumerate(listdir(WD)): # if ctr > 20: break if l.endswith('.nc'): path_out_weights_nc = join(ESMF_WEIGHTS_OUTPUT_DIR, 'weights_' + l) esmf_format = join(WD, l) log.debug(path_out_weights_nc) log.debug(esmf_format) search = re.search('esmf_single_element_(.+)_(.+).nc', l) gridcode, node_count = search.group(1), search.group(2) kwds = {'esmf_format': esmf_format, 'path_out_weights_nc': path_out_weights_nc, 'gridcode': int(gridcode), 'node_count': int(node_count)} to_process.append(kwds) log.debug(kwds) rtimes = Parallel(n_jobs=n_jobs)(delayed(make_weight_file)(**k) for k in to_process) for idx, k in enumerate(to_process): k['time'] = rtimes[idx] log.info(to_process)
def convert_collection_to_esmf_format(fmobj, filename, polygon_break_value=None, start_index=0, face_uid_name=None, dataset_kwargs=None): """ Convert to an ESMF format NetCDF files. Only supports ragged arrays. :param fm: Flexible mesh object to convert. :type fm: :class:`pyugrid.flexible_mesh.core.FlexibleMesh` :param ds: An open netCDF4 dataset object. :type ds: :class:`netCDF4.Dataset` """ dataset_kwargs = dataset_kwargs or {} # tdk: doc # face_areas = fmobj.face_areas # face_coordinates = fmobj.face_coordinates # if face_uid_name is None: # face_uid_value = None # else: # face_uid_value = fmobj.data[face_uid_name].data # faces = fmobj.faces # nodes = fmobj.nodes face_areas = fmobj['face_areas'] face_coordinates = fmobj['face_coordinates'] if face_uid_name is not None: face_uid_value = fmobj[face_uid_name] else: face_uid_value = None faces = fmobj['face'] nodes = fmobj['nodes'] # float_dtype = np.float32 # int_dtype = np.int32 # Transform ragged array to one-dimensional array. num_element_conn_data = [e.shape[0] for e in faces.flat] length_connection_count = sum(num_element_conn_data) element_conn_data = np.zeros(length_connection_count, dtype=faces[0].dtype) start = 0 for ii in faces.flat: element_conn_data[start:start + ii.shape[0]] = ii start += ii.shape[0] #################################################################################################################### # from ocgis.new_interface.variable import Variable, VariableCollection # coll = VariableCollection() # # coll.add_variable(Variable('nodeCoords', value=nodes, dtype=float_dtype, # dimensions=['nodeCount', 'coordDim'], units='degrees')) # # elementConn = Variable('elementConn', value=element_conn_data, dimensions='connectionCount', # attrs={'long_name': 'Node indices that define the element connectivity.', # 'start_index': start_index}) # if polygon_break_value is not None: # elementConn.attrs['polygon_break_value'] = polygon_break_value # coll.add_variable(elementConn) # # coll.add_variable(Variable('numElementConn', value=num_element_conn_data, dimensions='elementCount', # dtype=int_dtype, attrs={'long_name': 'Number of nodes per element.'})) # # coll.add_variable(Variable('centerCoords', value=face_coordinates, dimensions=['elementCount', 'coordDim'], # units='degrees', dtype=float_dtype)) # # if face_uid_name is not None: # coll.add_variable(Variable(face_uid_name, value=face_uid_value, dimensions='elementCount', # attrs={'long_name': 'Element unique identifier.'})) # # coll.add_variable(Variable('elementArea', value=face_areas, dimensions='elementCount', # attrs={'units': 'degrees', 'long_name': 'Element area in native units.'}, # dtype=float_dtype)) # # coll.attrs['gridType'] = 'unstructured' # coll.attrs['version'] = '0.9' # coll.attrs['coordDim'] = 'longitude latitude' # # coll.write(ds) node_counts = MPI_COMM.gather(nodes.shape[0]) element_counts = MPI_COMM.gather(faces.shape[0]) length_connection_counts = MPI_COMM.gather(length_connection_count) if MPI_RANK == 0: ds = nc.Dataset(filename, 'w', **dataset_kwargs) try: # Dimensions ----------------------------------------------------------------------------------------------- node_count_size = sum(node_counts) element_count_size = sum(element_counts) connection_count_size = sum(length_connection_counts) node_count = ds.createDimension('nodeCount', node_count_size) element_count = ds.createDimension('elementCount', element_count_size) coord_dim = ds.createDimension('coordDim', 2) # element_conn_vltype = ds.createVLType(fm.faces[0].dtype, 'elementConnVLType') connection_count = ds.createDimension('connectionCount', connection_count_size) # Variables ------------------------------------------------------------------------------------------------ node_coords = ds.createVariable('nodeCoords', nodes.dtype, (node_count.name, coord_dim.name)) node_coords.units = 'degrees' element_conn = ds.createVariable('elementConn', element_conn_data.dtype, (connection_count.name, )) element_conn.long_name = 'Node indices that define the element connectivity.' if polygon_break_value is not None: element_conn.polygon_break_value = polygon_break_value element_conn.start_index = start_index num_element_conn = ds.createVariable('numElementConn', np.int32, (element_count.name, )) num_element_conn.long_name = 'Number of nodes per element.' center_coords = ds.createVariable( 'centerCoords', face_coordinates.dtype, (element_count.name, coord_dim.name)) center_coords.units = 'degrees' if face_uid_value is not None: uid = ds.createVariable(face_uid_name, face_uid_value.dtype, dimensions=(element_count.name, )) uid.long_name = 'Element unique identifier.' element_area = ds.createVariable('elementArea', nodes.dtype, (element_count.name, )) element_area.units = 'degrees' element_area.long_name = 'Element area in native units.' # Global Attributes ---------------------------------------------------------------------------------------- ds.gridType = 'unstructured' ds.version = '0.9' setattr(ds, coord_dim.name, "longitude latitude") # element_mask = ds.createVariable('elementMask', np.int32, (element_count.name,)) finally: ds.close() # Fill variable values ----------------------------------------------------------------------------------------- node_coords_start = 0 node_coords_stop = None element_conn_start = 0 element_conn_stop = None for rank_to_write in range(MPI_SIZE): log.debug(('node_coords_start', node_coords_start)) if MPI_RANK == rank_to_write: ds = nc.Dataset(filename, mode='a') try: node_coords = ds.variables['nodeCoords'] element_conn = ds.variables['elementConn'] num_element_conn = ds.variables['numElementConn'] center_coords = ds.variables['centerCoords'] element_area = ds.variables['elementArea'] if face_uid_value is not None: uid = ds.variables[face_uid_name] node_coords_stop = node_coords_start + nodes.shape[0] element_conn_stop = element_conn_start + element_conn_data.shape[ 0] node_coords[node_coords_start:node_coords_stop] = nodes log.debug(('element_conn indices', element_conn_start, element_conn_stop)) element_conn[ element_conn_start:element_conn_stop] = element_conn_data start, stop = fmobj['section'] num_element_conn[start:stop] = num_element_conn_data center_coords[start:stop] = face_coordinates element_area[start:stop] = face_areas if face_uid_value is not None: uid[start:stop] = face_uid_value finally: ds.close() node_coords_start = MPI_COMM.bcast(node_coords_stop, root=rank_to_write) element_conn_start = MPI_COMM.bcast(element_conn_stop, root=rank_to_write) MPI_COMM.Barrier()
def get_variables(gm, use_ragged_arrays=False, with_connectivity=True): """ :param gm: The geometry manager containing geometries to convert to mesh variables. :type gm: :class:`pyugrid.flexible_mesh.helpers.GeometryManager` :param pack: If ``True``, de-deduplicate shared coordinates. :type pack: bool :returns: A tuple of arrays with index locations corresponding to: ===== ================ ============================= Index Name Type ===== ================ ============================= 0 face_nodes :class:`numpy.ma.MaskedArray` 1 face_edges :class:`numpy.ma.MaskedArray` 2 edge_nodes :class:`numpy.ndarray` 3 node_x :class:`numpy.ndarray` 4 node_y :class:`numpy.ndarray` 5 face_links :class:`numpy.ndarray` 6 face_ids :class:`numpy.ndarray` 7 face_coordinates :class:`numpy.ndarray` ===== ================ ============================= Information on individual variables may be found here: https://github.com/ugrid-conventions/ugrid-conventions/blob/9b6540405b940f0a9299af9dfb5e7c04b5074bf7/ugrid-conventions.md#2d-flexible-mesh-mixed-triangles-quadrilaterals-etc-topology :rtype: tuple (see table for array types) :raises: ValueError """ # tdk: update doc if len(gm) < MPI_SIZE: raise ValueError( 'The number of geometries must be greater than or equal to the number of processes.' ) pbv = UgridToolsConstants.POLYGON_BREAK_VALUE result = get_face_variables(gm, with_connectivity=with_connectivity) face_links, nmax_face_nodes, face_ids, face_coordinates, cdict, n_coords, face_areas, section = result # Find the start index for each rank. all_n_coords = MPI_COMM.gather(n_coords) if MPI_RANK == 0: all_idx_start = [0] * MPI_SIZE for idx in range(len(all_n_coords)): if idx == 0: continue else: all_idx_start[idx] = all_n_coords[idx - 1] + all_idx_start[idx - 1] else: all_idx_start = None idx_start = MPI_COMM.scatter(all_idx_start) log.debug(('idx_start', idx_start)) face_nodes, coordinates, edge_nodes = get_coordinate_dict_variables( cdict, n_coords, polygon_break_value=pbv, idx_start=idx_start) face_edges = face_nodes face_ids = np.array(cdict.keys(), dtype=np.int32) if not use_ragged_arrays: new_arrays = [] for a in (face_links, face_nodes, face_edges): new_arrays.append( get_rectangular_array_from_object_array( a, (a.shape[0], nmax_face_nodes))) face_links, face_nodes, face_edges = new_arrays return face_nodes, face_edges, edge_nodes, coordinates, face_links, face_ids, face_coordinates, face_areas, section