示例#1
0
def convert_shapefiles(pred=None, node_threshold=None):
    # Run with MPI: mpirun -n 8 prep_shapefiles.py

    nfie_version = 'node-threshold-10000'

    name_uid = 'GRIDCODE'
    storage_dir_shp = os.path.expanduser(
        '/media/benkoziol/Extra Drive 1/data/nfie/linked_catchment_shapefiles')
    storage_dir_esmf = os.path.expanduser(
        '/media/benkoziol/Extra Drive 1/data/nfie/node-thresholded-10000')
    esmf_name_template = 'esmf_format_{cid}_{nfie_version}.nc'

    log.info('Starting conversion for: {}'.format(nfie_version))

    for dirpath, dirnames, filenames in os.walk(
            os.path.join(storage_dir_shp, storage_dir_shp)):
        for fn in filenames:
            if fn.endswith('.shp'):
                log.debug(fn)
                if pred is not None and not pred(fn):
                    continue
                cid = re.search('linked_(.*).shp', fn).group(1)
                path_catchment_shp = os.path.join(dirpath, fn)
                esmf_name = esmf_name_template.format(
                    cid=cid, nfie_version=nfie_version)
                path_esmf_format_nc = os.path.join(storage_dir_esmf, esmf_name)
                log.info('Converting {}'.format(path_catchment_shp))
                # log.debug((path_esmf_format_nc, path_catchment_shp, name_uid))
                try:
                    convert_to_esmf_format(path_esmf_format_nc,
                                           path_catchment_shp,
                                           name_uid,
                                           node_threshold=node_threshold)
                except:
                    log.exception(path_catchment_shp)
示例#2
0
def make_shapefiles_and_esmf_format():
    log.level = logbook.DEBUG

    template_shapefile = 'single_element_{gridcode}_{node_count}.shp'
    template_esmf_format = 'esmf_single_element_{gridcode}_{node_count}.nc'

    to_process = get_representative_nodes()

    for tp in to_process:
        log.debug(tp)
        gridcode = tp['gridcode']
        node_count = tp['node_count']
        filename_shapefile = template_shapefile.format(gridcode=gridcode, node_count=node_count)
        filename_esmf_format = template_esmf_format.format(gridcode=gridcode, node_count=node_count)

        # Extract the element and write the shapefile.
        sink_path = join(WD, filename_shapefile)
        source_path = get_source_shapefile(SHAPEFILE_DIR, tp['name'])
        log.debug(source_path)
        found = False
        with fiona.open(source_path) as source:
            with fiona.open(sink_path, 'w', **source.meta) as sink:
                for record in source:
                    if record['properties']['GRIDCODE'] == gridcode:
                        found = True
                        sink.write(record)
                        break
        assert found

        # Convert the shapefile to ESMF format.
        path_out_nc = join(WD, filename_esmf_format)
        path_in_shp = sink_path
        convert_to_esmf_format(path_out_nc, path_in_shp, 'GRIDCODE')
示例#3
0
def run_create_linked_shapefile():
    log.level = INFO
    name_uid = 'GRIDCODE'
    output_variable = 'pr'
    directory_shapefiles = expanduser('~/storage/catchment_shapefiles')
    directory_linked_shapefiles = expanduser('~/storage/linked_catchment_shapefiles')
    directory_weighted_data = expanduser('~/storage/catchment_weighted_data')

    if MPI_RANK == 0:
        weighted_data_files = filter(lambda x: x.startswith('pr_weighted'), listdir(directory_weighted_data))
        weighted_data_files = [weighted_data_files[slc[0]: slc[1]] for slc in create_sections(len(weighted_data_files))]
    else:
        weighted_data_files = None

    weighted_data_files = MPI_COMM.scatter(weighted_data_files, root=0)

    for ii in weighted_data_files:
        path_output_data = join(directory_weighted_data, ii)
        # log.debug(ii)
        res = re.search('pr_weighted-catchments_esmf_(.*)_', ii).groups()[0]
        # log.debug(res.groups()[0])
        shapefile_directory = join(directory_shapefiles, res)
        path_in_shp = check_output(['find', shapefile_directory, '-name', '*Catchment.shp']).strip()
        path_linked_shp = join(directory_linked_shapefiles, 'linked_{}.shp'.format(res))
        log.debug((path_in_shp, path_linked_shp))
        log.info('Creating linked shapefile for: {}'.format(path_output_data))
        create_linked_shapefile(name_uid, output_variable, path_in_shp, path_linked_shp, path_output_data)
示例#4
0
def get_representative_nodes():
    """Get elements with a reasonable distribution of node counts."""

    step = 1000
    start = 0
    stop = 60000
    limit = 50
    record_names = ['name', 'gridcode', 'node_count']

    ranges = []
    to_process = []

    s = Session()

    while start < stop:
        ranges.append([start, start + step])
        start += step

    for ctr, select_range in enumerate(ranges):
        log.debug(select_range)
        q = s.query(VectorProcessingUnit.name, Catchment.gridcode, Catchment.node_count).join(Catchment)
        q = q.filter(Catchment.node_count >= select_range[0]).filter(Catchment.node_count < select_range[1])
        q = q.order_by(func.random()).limit(limit)

        for idx, record in enumerate(q):
            to_process.append(dict(zip(record_names, record)))

    s.close()

    return to_process
示例#5
0
def run_create_linked_shapefile():
    log.level = INFO
    name_uid = 'GRIDCODE'
    output_variable = 'pr'
    directory_shapefiles = expanduser('~/storage/catchment_shapefiles')
    directory_linked_shapefiles = expanduser(
        '~/storage/linked_catchment_shapefiles')
    directory_weighted_data = expanduser('~/storage/catchment_weighted_data')

    if MPI_RANK == 0:
        weighted_data_files = filter(lambda x: x.startswith('pr_weighted'),
                                     listdir(directory_weighted_data))
        weighted_data_files = [
            weighted_data_files[slc[0]:slc[1]]
            for slc in create_sections(len(weighted_data_files))
        ]
    else:
        weighted_data_files = None

    weighted_data_files = MPI_COMM.scatter(weighted_data_files, root=0)

    for ii in weighted_data_files:
        path_output_data = join(directory_weighted_data, ii)
        # log.debug(ii)
        res = re.search('pr_weighted-catchments_esmf_(.*)_', ii).groups()[0]
        # log.debug(res.groups()[0])
        shapefile_directory = join(directory_shapefiles, res)
        path_in_shp = check_output(
            ['find', shapefile_directory, '-name', '*Catchment.shp']).strip()
        path_linked_shp = join(directory_linked_shapefiles,
                               'linked_{}.shp'.format(res))
        log.debug((path_in_shp, path_linked_shp))
        log.info('Creating linked shapefile for: {}'.format(path_output_data))
        create_linked_shapefile(name_uid, output_variable, path_in_shp,
                                path_linked_shp, path_output_data)
示例#6
0
def from_shapefile(path,
                   name_uid,
                   mesh_name='mesh',
                   path_rtree=None,
                   use_ragged_arrays=False,
                   with_connectivity=True,
                   allow_multipart=False,
                   node_threshold=None,
                   driver_kwargs=None,
                   debug=False,
                   dest_crs=None,
                   split_interiors=True):
    """
    Create a flexible mesh from a target shapefile.

    >>> path = '/input/target.shp'
    >>> name_uid = 'UID'
    >>> fm = FlexibleMesh.from_shapefile(path, name_uid)

    :param path: Path to the target shapefile.
    :type path: str
    :param name_uid: Name of the integer unique identifier in the target shapefile. This value will be maintained on
     the output mesh object.
    :type name_uid: str
    :param mesh_name: Name of the mesh catalog variable.
    :type mesh: str
    :param path_rtree: Path to a serialized spatial index object created using ``rtree``. Use :func:`pyugrid.flexible_mesh.helpers.create_rtree_file`
     to create a persistent ``rtree`` spatial index file.
    :type path_rtree: str
    :rtype: :class:`pyugrid.flexible_mesh.core.FlexibleMesh`
    """
    # tdk: update doc
    from utools.io.geom_manager import GeometryManager

    if debug:
        slc = [0, 1]
    else:
        slc = None

    log.debug('creating geometry manager')
    log.debug(('driver_kwargs', driver_kwargs))
    gm = GeometryManager(name_uid,
                         path=path,
                         path_rtree=path_rtree,
                         allow_multipart=allow_multipart,
                         node_threshold=node_threshold,
                         slc=slc,
                         driver_kwargs=driver_kwargs,
                         dest_crs=dest_crs,
                         split_interiors=split_interiors)
    log.debug('geometry manager created')

    ret = get_flexible_mesh(gm,
                            mesh_name,
                            use_ragged_arrays,
                            with_connectivity=with_connectivity)
    log.debug('mesh collection returned')

    return ret
示例#7
0
def convert(source_uid, source, esmf_format, feature_class, config_path,
            dest_crs_index, node_threshold, split, debug):
    from utools.prep.prep_shapefiles import convert_to_esmf_format

    log_entry('info',
              'Started converting to ESMF format: {}'.format(source),
              rank=0)

    # Set the feature class name even if it is None. Feature class name is required for a file geodatabase.
    driver_kwargs = {'feature_class': feature_class}

    # If there is a destination CRS, read in the value and convert to a spatial reference object for the geometry
    # manager.
    if dest_crs_index is not None:
        log.debug(('dest_crs_index', dest_crs_index))
        dest_crs_index = dest_crs_index.split(',')
        if len(dest_crs_index) == 1:
            crs_section = None
            crs_option = dest_crs_index[0]
        elif len(dest_crs_index) == 2:
            crs_section, crs_option = dest_crs_index
        else:
            raise NotImplementedError(len(dest_crs_index))
        sp = SafeConfigParser()
        sp.read(config_path)
        crs_wkt = sp.get(crs_section, crs_option)
        dest_crs = osgeo.osr.SpatialReference()
        dest_crs.ImportFromWkt(crs_wkt)
    else:
        dest_crs = None

    convert_to_esmf_format(esmf_format,
                           source,
                           source_uid,
                           node_threshold=node_threshold,
                           driver_kwargs=driver_kwargs,
                           debug=debug,
                           dest_crs=dest_crs,
                           split_interiors=split)
    log_entry('info',
              'Finished converting to ESMF format: {}'.format(source),
              rank=0)
 def test_coordinates(self):
     path = '/home/benkoziol/data/pmesh/catchment_shapefiles/NHDPlusTX/NHDPlus12/NHDPlusCatchment/Catchment.shp'
     with fiona.open(path, 'r') as source:
         for record in source:
             log.debug(record['properties']['GRIDCODE'])
             geom = shape(record['geometry'])
             if isinstance(geom, MultiPolygon):
                 itr = geom
             else:
                 itr = [geom]
             for polygon in itr:
                 polygon = get_oriented_and_valid_geometry(polygon)
                 self.assertTrue(polygon.exterior.is_ccw)
                 coords = np.array(polygon.exterior.coords)
                 try:
                     self.assertTrue(coords.shape[0] > 1)
                 except AssertionError:
                     log.error('AssertionError GRIDCODE={}'.format(
                         record['properties']['GRIDCODE']))
                     continue
示例#9
0
def convert_to_esmf_format(path_out_nc,
                           path_in_shp,
                           name_uid,
                           node_threshold=None,
                           debug=False,
                           driver_kwargs=None,
                           dest_crs=None,
                           with_connectivity=False,
                           dataset_kwargs=None):
    polygon_break_value = UgridToolsConstants.POLYGON_BREAK_VALUE

    log.debug('loading flexible mesh')
    coll = from_shapefile(path_in_shp,
                          name_uid,
                          use_ragged_arrays=True,
                          with_connectivity=with_connectivity,
                          allow_multipart=True,
                          node_threshold=node_threshold,
                          debug=debug,
                          driver_kwargs=driver_kwargs,
                          dest_crs=dest_crs)
    log.debug('writing flexible mesh')
    convert_collection_to_esmf_format(coll,
                                      path_out_nc,
                                      polygon_break_value=polygon_break_value,
                                      face_uid_name=name_uid,
                                      dataset_kwargs=dataset_kwargs)
    # validate_esmf_format(ds, name_uid, path_in_shp)
    log.debug('success')
示例#10
0
def make_weight_files():
    n_jobs = 16
    to_process = []

    log.level = logbook.INFO

    for ctr, l in enumerate(listdir(WD)):
        # if ctr > 20: break

        if l.endswith('.nc'):
            path_out_weights_nc = join(ESMF_WEIGHTS_OUTPUT_DIR, 'weights_' + l)
            esmf_format = join(WD, l)

            log.debug(path_out_weights_nc)
            log.debug(esmf_format)

            search = re.search('esmf_single_element_(.+)_(.+).nc', l)
            gridcode, node_count = search.group(1), search.group(2)
            kwds = {'esmf_format': esmf_format,
                    'path_out_weights_nc': path_out_weights_nc,
                    'gridcode': int(gridcode),
                    'node_count': int(node_count)}
            to_process.append(kwds)

            log.debug(kwds)

    rtimes = Parallel(n_jobs=n_jobs)(delayed(make_weight_file)(**k) for k in to_process)

    for idx, k in enumerate(to_process):
        k['time'] = rtimes[idx]

    log.info(to_process)
示例#11
0
def convert_collection_to_esmf_format(fmobj,
                                      filename,
                                      polygon_break_value=None,
                                      start_index=0,
                                      face_uid_name=None,
                                      dataset_kwargs=None):
    """
    Convert to an ESMF format NetCDF files. Only supports ragged arrays.

    :param fm: Flexible mesh object to convert.
    :type fm: :class:`pyugrid.flexible_mesh.core.FlexibleMesh`
    :param ds: An open netCDF4 dataset object.
    :type ds: :class:`netCDF4.Dataset`
    """

    dataset_kwargs = dataset_kwargs or {}

    # tdk: doc
    # face_areas = fmobj.face_areas
    # face_coordinates = fmobj.face_coordinates
    # if face_uid_name is None:
    #     face_uid_value = None
    # else:
    #     face_uid_value = fmobj.data[face_uid_name].data
    # faces = fmobj.faces
    # nodes = fmobj.nodes

    face_areas = fmobj['face_areas']
    face_coordinates = fmobj['face_coordinates']
    if face_uid_name is not None:
        face_uid_value = fmobj[face_uid_name]
    else:
        face_uid_value = None
    faces = fmobj['face']
    nodes = fmobj['nodes']

    # float_dtype = np.float32
    # int_dtype = np.int32

    # Transform ragged array to one-dimensional array.
    num_element_conn_data = [e.shape[0] for e in faces.flat]
    length_connection_count = sum(num_element_conn_data)
    element_conn_data = np.zeros(length_connection_count, dtype=faces[0].dtype)
    start = 0
    for ii in faces.flat:
        element_conn_data[start:start + ii.shape[0]] = ii
        start += ii.shape[0]

    ####################################################################################################################

    # from ocgis.new_interface.variable import Variable, VariableCollection
    # coll = VariableCollection()
    #
    # coll.add_variable(Variable('nodeCoords', value=nodes, dtype=float_dtype,
    #                            dimensions=['nodeCount', 'coordDim'], units='degrees'))
    #
    # elementConn = Variable('elementConn', value=element_conn_data, dimensions='connectionCount',
    #                        attrs={'long_name': 'Node indices that define the element connectivity.',
    #                               'start_index': start_index})
    # if polygon_break_value is not None:
    #     elementConn.attrs['polygon_break_value'] = polygon_break_value
    # coll.add_variable(elementConn)
    #
    # coll.add_variable(Variable('numElementConn', value=num_element_conn_data, dimensions='elementCount',
    #                            dtype=int_dtype, attrs={'long_name': 'Number of nodes per element.'}))
    #
    # coll.add_variable(Variable('centerCoords', value=face_coordinates, dimensions=['elementCount', 'coordDim'],
    #                            units='degrees', dtype=float_dtype))
    #
    # if face_uid_name is not None:
    #     coll.add_variable(Variable(face_uid_name, value=face_uid_value, dimensions='elementCount',
    #                                attrs={'long_name': 'Element unique identifier.'}))
    #
    # coll.add_variable(Variable('elementArea', value=face_areas, dimensions='elementCount',
    #                            attrs={'units': 'degrees', 'long_name': 'Element area in native units.'},
    #                            dtype=float_dtype))
    #
    # coll.attrs['gridType'] = 'unstructured'
    # coll.attrs['version'] = '0.9'
    # coll.attrs['coordDim'] = 'longitude latitude'
    #
    # coll.write(ds)

    node_counts = MPI_COMM.gather(nodes.shape[0])
    element_counts = MPI_COMM.gather(faces.shape[0])
    length_connection_counts = MPI_COMM.gather(length_connection_count)

    if MPI_RANK == 0:
        ds = nc.Dataset(filename, 'w', **dataset_kwargs)
        try:
            # Dimensions -----------------------------------------------------------------------------------------------

            node_count_size = sum(node_counts)
            element_count_size = sum(element_counts)
            connection_count_size = sum(length_connection_counts)

            node_count = ds.createDimension('nodeCount', node_count_size)
            element_count = ds.createDimension('elementCount',
                                               element_count_size)
            coord_dim = ds.createDimension('coordDim', 2)
            # element_conn_vltype = ds.createVLType(fm.faces[0].dtype, 'elementConnVLType')
            connection_count = ds.createDimension('connectionCount',
                                                  connection_count_size)

            # Variables ------------------------------------------------------------------------------------------------

            node_coords = ds.createVariable('nodeCoords', nodes.dtype,
                                            (node_count.name, coord_dim.name))
            node_coords.units = 'degrees'

            element_conn = ds.createVariable('elementConn',
                                             element_conn_data.dtype,
                                             (connection_count.name, ))
            element_conn.long_name = 'Node indices that define the element connectivity.'
            if polygon_break_value is not None:
                element_conn.polygon_break_value = polygon_break_value
            element_conn.start_index = start_index

            num_element_conn = ds.createVariable('numElementConn', np.int32,
                                                 (element_count.name, ))
            num_element_conn.long_name = 'Number of nodes per element.'

            center_coords = ds.createVariable(
                'centerCoords', face_coordinates.dtype,
                (element_count.name, coord_dim.name))
            center_coords.units = 'degrees'

            if face_uid_value is not None:
                uid = ds.createVariable(face_uid_name,
                                        face_uid_value.dtype,
                                        dimensions=(element_count.name, ))
                uid.long_name = 'Element unique identifier.'

            element_area = ds.createVariable('elementArea', nodes.dtype,
                                             (element_count.name, ))
            element_area.units = 'degrees'
            element_area.long_name = 'Element area in native units.'

            # Global Attributes ----------------------------------------------------------------------------------------

            ds.gridType = 'unstructured'
            ds.version = '0.9'
            setattr(ds, coord_dim.name, "longitude latitude")

            # element_mask = ds.createVariable('elementMask', np.int32, (element_count.name,))

        finally:
            ds.close()

    # Fill variable values -----------------------------------------------------------------------------------------

    node_coords_start = 0
    node_coords_stop = None
    element_conn_start = 0
    element_conn_stop = None

    for rank_to_write in range(MPI_SIZE):
        log.debug(('node_coords_start', node_coords_start))
        if MPI_RANK == rank_to_write:
            ds = nc.Dataset(filename, mode='a')
            try:
                node_coords = ds.variables['nodeCoords']
                element_conn = ds.variables['elementConn']
                num_element_conn = ds.variables['numElementConn']
                center_coords = ds.variables['centerCoords']
                element_area = ds.variables['elementArea']
                if face_uid_value is not None:
                    uid = ds.variables[face_uid_name]

                node_coords_stop = node_coords_start + nodes.shape[0]
                element_conn_stop = element_conn_start + element_conn_data.shape[
                    0]
                node_coords[node_coords_start:node_coords_stop] = nodes
                log.debug(('element_conn indices', element_conn_start,
                           element_conn_stop))
                element_conn[
                    element_conn_start:element_conn_stop] = element_conn_data

                start, stop = fmobj['section']
                num_element_conn[start:stop] = num_element_conn_data
                center_coords[start:stop] = face_coordinates
                element_area[start:stop] = face_areas
                if face_uid_value is not None:
                    uid[start:stop] = face_uid_value
            finally:
                ds.close()
        node_coords_start = MPI_COMM.bcast(node_coords_stop,
                                           root=rank_to_write)
        element_conn_start = MPI_COMM.bcast(element_conn_stop,
                                            root=rank_to_write)
        MPI_COMM.Barrier()
示例#12
0
def get_variables(gm, use_ragged_arrays=False, with_connectivity=True):
    """
    :param gm: The geometry manager containing geometries to convert to mesh variables.
    :type gm: :class:`pyugrid.flexible_mesh.helpers.GeometryManager`
    :param pack: If ``True``, de-deduplicate shared coordinates.
    :type pack: bool
    :returns: A tuple of arrays with index locations corresponding to:

    ===== ================ =============================
    Index Name             Type
    ===== ================ =============================
    0     face_nodes       :class:`numpy.ma.MaskedArray`
    1     face_edges       :class:`numpy.ma.MaskedArray`
    2     edge_nodes       :class:`numpy.ndarray`
    3     node_x           :class:`numpy.ndarray`
    4     node_y           :class:`numpy.ndarray`
    5     face_links       :class:`numpy.ndarray`
    6     face_ids         :class:`numpy.ndarray`
    7     face_coordinates :class:`numpy.ndarray`
    ===== ================ =============================

    Information on individual variables may be found here: https://github.com/ugrid-conventions/ugrid-conventions/blob/9b6540405b940f0a9299af9dfb5e7c04b5074bf7/ugrid-conventions.md#2d-flexible-mesh-mixed-triangles-quadrilaterals-etc-topology

    :rtype: tuple (see table for array types)
    :raises: ValueError
    """
    # tdk: update doc
    if len(gm) < MPI_SIZE:
        raise ValueError(
            'The number of geometries must be greater than or equal to the number of processes.'
        )

    pbv = UgridToolsConstants.POLYGON_BREAK_VALUE

    result = get_face_variables(gm, with_connectivity=with_connectivity)
    face_links, nmax_face_nodes, face_ids, face_coordinates, cdict, n_coords, face_areas, section = result

    # Find the start index for each rank.
    all_n_coords = MPI_COMM.gather(n_coords)
    if MPI_RANK == 0:
        all_idx_start = [0] * MPI_SIZE
        for idx in range(len(all_n_coords)):
            if idx == 0:
                continue
            else:
                all_idx_start[idx] = all_n_coords[idx -
                                                  1] + all_idx_start[idx - 1]
    else:
        all_idx_start = None
    idx_start = MPI_COMM.scatter(all_idx_start)
    log.debug(('idx_start', idx_start))

    face_nodes, coordinates, edge_nodes = get_coordinate_dict_variables(
        cdict, n_coords, polygon_break_value=pbv, idx_start=idx_start)
    face_edges = face_nodes
    face_ids = np.array(cdict.keys(), dtype=np.int32)

    if not use_ragged_arrays:
        new_arrays = []
        for a in (face_links, face_nodes, face_edges):
            new_arrays.append(
                get_rectangular_array_from_object_array(
                    a, (a.shape[0], nmax_face_nodes)))
        face_links, face_nodes, face_edges = new_arrays

    return face_nodes, face_edges, edge_nodes, coordinates, face_links, face_ids, face_coordinates, face_areas, section