def get_mask_from_intersects(self, geometry_or_bounds, use_spatial_index=env.USE_SPATIAL_INDEX, keep_touches=False, original_mask=None): """ :param geometry_or_bounds: A Shapely geometry or bounds tuple used for the masking. :type geometry_or_bounds: :class:`shapely.geometry.base.BaseGeometry` | :class:`tuple` :param bool use_spatial_index: If ``True``, use a spatial index for the operation. :param bool keep_touches: If ``True``, keep geometries that only touch. :param original_mask: A hint mask for the spatial operation. ``True`` values will be skipped. :type original_mask: :class:`numpy.ndarray` :returns: boolean array with non-intersecting values set to ``True`` :rtype: :class:`numpy.ndarray` """ raise_if_empty(self) # Transform bounds sequence to a geometry. if not isinstance(geometry_or_bounds, BaseGeometry): geometry_or_bounds = box(*geometry_or_bounds) ret = geometryvariable_get_mask_from_intersects( self, geometry_or_bounds, use_spatial_index=use_spatial_index, keep_touches=keep_touches, original_mask=original_mask) return ret
def get_buffer(self, *args, **kwargs): """ Return a shallow copy of the geometry variable with geometries buffered. .. note:: Accepts all parameters to :meth:`shapely.geometry.base.BaseGeometry.buffer`. An additional keyword argument is: :keyword str geom_type: The geometry type for the new buffered geometry if known in advance. :rtype: :class:`~ocgis.GeometryVariable` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) # New geometry type for the buffered object. geom_type = kwargs.pop('geom_type', 'auto') ret = self.copy() new_value = np.empty_like(ret.get_value(), dtype=object) to_buffer = self.get_value() mask = self.get_mask() for idx, mask_value in iter_array(mask, return_value=True): if not mask_value: new_value[idx] = to_buffer[idx].buffer(*args, **kwargs) else: new_value[idx] = None ret.set_value(new_value) ret._geom_type = geom_type return ret
def write_variable_collection(cls, vc, opened_or_path, **kwargs): raise_if_empty(vc) if 'ranks_to_write' in kwargs: raise TypeError( "write_variable_collection() got an unexepcted keyword argument 'ranks_to_write'" ) write_mode = kwargs.pop(KeywordArgument.WRITE_MODE, None) if vm.size > 1: if cls.inquire_opened_state(opened_or_path): raise ValueError('Only paths allowed for parallel writes.') if write_mode is None: write_modes = cls._get_write_modes_(vm, **kwargs) else: write_modes = [write_mode] # vm.rank_print('tkd: write_modes', write_modes) # Global string lengths are needed by the write. Set those while we still have global access. for var in vc.values(): if var._string_max_length_global is None: var.set_string_max_length_global() for write_mode in write_modes: cls._write_variable_collection_main_(vc, opened_or_path, write_mode, **kwargs)
def write_variable_collection(cls, vc, opened_or_path, **kwargs): raise_if_empty(vc) if 'ranks_to_write' in kwargs: raise TypeError("write_variable_collection() got an unexepcted keyword argument 'ranks_to_write'") write_mode = kwargs.pop(KeywordArgument.WRITE_MODE, None) if vm.size > 1: if cls.inquire_opened_state(opened_or_path): raise ValueError('Only paths allowed for parallel writes.') if write_mode is None: write_modes = cls._get_write_modes_(vm, **kwargs) else: write_modes = [write_mode] # vm.rank_print('tkd: write_modes', write_modes) # Global string lengths are needed by the write. Set those while we still have global access. for var in vc.values(): if var._string_max_length_global is None: var.set_string_max_length_global() for write_mode in write_modes: cls._write_variable_collection_main_(vc, opened_or_path, write_mode, **kwargs)
def reduce_global(self): """ De-duplicate and reindex (reset start index) an element node connectivity variable. Operation is collective across the current VM. The new node dimension is distributed. Return a shallow copy of `self` for convenience. :rtype: :class:`~ocgis.spatial.geomc.AbstractGeometryCoordinates` """ raise_if_empty(self) if self.cindex is None: raise ValueError('A coordinate index is required to reduce coordinates.') new_cindex, uidx = reduce_reindex_coordinate_index(self.cindex, start_index=self.start_index) new_cindex = Variable(name=self.cindex.name, value=new_cindex, dimensions=self.cindex.dimensions) ret = self.copy() new_parent = self.x[uidx].parent cdim = new_parent[self.x.name].dimensions[0] new_node_dimension = create_distributed_dimension(cdim.size, name=cdim.name, src_idx=cdim._src_idx) new_parent.dimensions[cdim.name] = new_node_dimension new_parent[self.cindex.name].extract(clean_break=True) ret._cindex_name = None ret.parent = new_parent ret.cindex = new_cindex return ret
def wrapped_state(self): raise_if_empty(self) if self.crs is None: ret = None else: ret = self.crs.get_wrapped_state(self) return ret
def wrap(self): raise_if_empty(self) if self.crs is None or not self.crs.is_geographic: raise ValueError( "Only spherical coordinate systems may be wrapped/unwrapped.") else: self.crs.wrap_or_unwrap(WrapAction.WRAP, self)
def get_wrapped_state(self, target): """ :param target: Return the wrapped state of a field. This function only checks grid centroids and geometry exteriors. Bounds/corners on the grid are excluded. :type target: :class:`~ocgis.Field` """ # TODO: Wrapped state should operate on the x-coordinate variable vectors or geometries only. # TODO: This should be a method on grids and geometry variables. from ocgis.collection.field import Field from ocgis.spatial.base import AbstractXYZSpatialContainer from ocgis import vm raise_if_empty(self) # If this is not a wrappable coordinate system, wrapped state is undefined. if not self.is_wrappable: ret = None else: if isinstance(target, Field): grid = target.grid if grid is not None: target = grid else: target = target.geom if target is None: raise WrappedStateEvalTargetMissing elif target.is_empty: ret = None elif isinstance(target, AbstractXYZSpatialContainer): ret = self._get_wrapped_state_from_array_(target.x.get_value()) else: stops = (WrappedState.WRAPPED, WrappedState.UNWRAPPED) ret = WrappedState.UNKNOWN geoms = target.get_masked_value().flat _is_masked = np.ma.is_masked _get_ws = self._get_wrapped_state_from_geometry_ for geom in geoms: if not _is_masked(geom): flag = _get_ws(geom) if flag in stops: ret = flag break rets = vm.gather(ret) if vm.rank == 0: rets = set(rets) if WrappedState.WRAPPED in rets: ret = WrappedState.WRAPPED elif WrappedState.UNWRAPPED in rets: ret = WrappedState.UNWRAPPED else: ret = list(rets)[0] else: ret = None ret = vm.bcast(ret) return ret
def element_dim(self): """ Get the element dimension. The size of the dimension is equivalent to the element count. :rtype: :class:`~ocgis.Dimension` """ raise_if_empty(self) return self.parent.driver.get_element_dimension(self)
def unwrap(self): """ Unwrap the field's coordinates contained in its grid and/or geometry. :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) wrap_or_unwrap(self, WrapAction.UNWRAP)
def __init__(self, field, output_crs='input', wrap=None): if not isinstance(field, Field): raise ValueError('"field" must be an "Field" object.') raise_if_empty(field) self.field = field self.output_crs = output_crs self.wrap = wrap self._original_rotated_pole_state = None
def update_crs(self, to_crs): """Update coordinate system in-place.""" raise_if_empty(self) if self.crs is None: msg = 'The current CRS is None and cannot be updated. Has the coordinate system been assigned ' \ 'appropriately?' raise ValueError(msg) if to_crs is None: msg = 'The destination CRS may not be None. Has the coordinate system been assigned appropriately?' raise ValueError(msg)
def test_reduce_reindex_coordinate_variables(self): self.add_barrier = False dist = OcgDist() dist.create_dimension('dim', 12, dist=True) dist.update_dimension_bounds() global_cindex_arr = np.array([4, 2, 1, 2, 1, 4, 1, 4, 2, 5, 6, 7]) if vm.rank == 0: var_cindex = Variable('cindex', value=global_cindex_arr, dimensions='dim') else: var_cindex = None var_cindex = variable_scatter(var_cindex, dist) vm.create_subcomm_by_emptyable('test', var_cindex, is_current=True) if vm.is_null: return raise_if_empty(var_cindex) coords = np.array([ 0, 11, 22, 33, 44, 55, 66, 77, 88, 99, 100, 110, 120, 130, 140, 150 ]) coords = Variable(name='coords', value=coords, dimensions='coord_dim') new_cindex, u_indices = reduce_reindex_coordinate_variables(var_cindex) desired = coords[global_cindex_arr].get_value() if len(u_indices) > 0: new_coords = coords[u_indices].get_value() else: new_coords = np.array([]) gathered_new_coords = vm.gather(new_coords) gathered_new_cindex = vm.gather(new_cindex) if vm.rank == 0: gathered_new_coords = hgather(gathered_new_coords) gathered_new_cindex = hgather(gathered_new_cindex) actual = gathered_new_coords[gathered_new_cindex] self.assertAsSetEqual(gathered_new_cindex.tolist(), [2, 1, 0, 3, 4, 5]) desired_new_coords = [11, 22, 44, 55, 66, 77] self.assertAsSetEqual(gathered_new_coords.tolist(), desired_new_coords) self.assertEqual(len(gathered_new_coords), len(desired_new_coords)) self.assertNumpyAll(actual, desired)
def has_mask_global(self): """ Returns ``True`` if the global spatial object has a mask. Collective across the current VM. :rtype: bool """ raise_if_empty(self) has_masks = vm.gather(self.has_mask) if vm.rank == 0: has_mask = np.any(has_masks) else: has_mask = None has_mask = vm.bcast(has_mask) return has_mask
def wrapped_state(self): """ :return: The wrapped state for the field. :rtype: :attr:`ocgis.constants.WrappedState` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) if self.crs is None: ret = None else: ret = self.crs.get_wrapped_state(self) return ret
def has_masked_values_global(self): """ Returns ``True`` if the global spatial object's mask contains any masked values. Will return ``False`` if the global object has no mask. Collective across the current VM. :rtype: bool """ raise_if_empty(self) has_masks = vm.gather(self.has_masked_values) if vm.rank == 0: ret = np.any(has_masks) else: ret = None ret = vm.bcast(ret) return ret
def geom_type_global(self): """ :returns: global geometry type collective across the current :class:`~ocgis.OcgVM` :rtype: str :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) geom_types = vm.gather(self.geom_type) if vm.rank == 0: for g in geom_types: if g.startswith('Multi'): break else: g = None return vm.bcast(g)
def update_crs(self, to_crs, from_crs=None): """ See :meth:`ocgis.spatial.base.AbstractOperationsSpatialObject.update_crs` """ raise_if_empty(self) if from_crs is None: from_crs = self.crs if self.grid is not None: self.grid.update_crs(to_crs, from_crs=from_crs) if self.geom is not None: self.geom.update_crs(to_crs, from_crs=from_crs) self.dimension_map.set_crs(to_crs)
def update_crs(self, to_crs): """ Update the field coordinates contained in its grid and/or geometry. :param to_crs: The destination coordinate reference system. :type to_crs: :class:`~ocgis.variable.crs.AbstractCRS` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) if self.grid is not None: self.grid.update_crs(to_crs) else: self.geom.update_crs(to_crs) self.dimension_map.set_crs(to_crs)
def update_crs(self, to_crs): """ Update the coordinate system in place. :param to_crs: The destination coordinate system. :type to_crs: :class:`~ocgis.variable.crs.AbstractCRS` """ raise_if_empty(self) if self.crs is None: msg = 'The current CRS is None and cannot be updated. Has the coordinate system been assigned ' \ 'appropriately?' raise ValueError(msg) if to_crs is None: msg = 'The destination CRS may not be None. Has the coordinate system been assigned appropriately?' raise ValueError(msg)
def get_or_create_spatial_mask(*args, **kwargs): """ Get or create the SCRIP spatial mask. Arguments and keyword arguments in signature are for driver compatibility only. """ sobj = args[0] raise_if_empty(sobj) ret = None if sobj.has_mask: maskvar = sobj.parent['grid_imask'] if not maskvar.has_allocated_value: v = maskvar.v() ret = v == 0 maskvar.set_mask(ret) else: ret = maskvar.m() return ret
def test_reduce_reindex_coordinate_index(self): dist = OcgDist() dist.create_dimension('dim', 12, dist=True) dist.update_dimension_bounds() global_cindex_arr = np.array([4, 2, 1, 2, 1, 4, 1, 4, 2, 5, 6, 7]) if vm.rank == 0: var_cindex = Variable('cindex', value=global_cindex_arr, dimensions='dim') else: var_cindex = None var_cindex = variable_scatter(var_cindex, dist) vm.create_subcomm_by_emptyable('test', var_cindex, is_current=True) if vm.is_null: return raise_if_empty(var_cindex) coords = np.array([0, 11, 22, 33, 44, 55, 66, 77, 88, 99, 100, 110, 120, 130, 140, 150]) coords = Variable(name='coords', value=coords, dimensions='coord_dim') new_cindex, u_indices = reduce_reindex_coordinate_index(var_cindex) desired = coords[global_cindex_arr].get_value() if len(u_indices) > 0: new_coords = coords[u_indices].get_value() else: new_coords = np.array([]) gathered_new_coords = vm.gather(new_coords) gathered_new_cindex = vm.gather(new_cindex) if vm.rank == 0: gathered_new_coords = hgather(gathered_new_coords) gathered_new_cindex = hgather(gathered_new_cindex) actual = gathered_new_coords[gathered_new_cindex] self.assertAsSetEqual(gathered_new_cindex.tolist(), [2, 1, 0, 3, 4, 5]) desired_new_coords = [11, 22, 44, 55, 66, 77] self.assertAsSetEqual(gathered_new_coords.tolist(), desired_new_coords) self.assertEqual(len(gathered_new_coords), len(desired_new_coords)) self.assertNumpyAll(actual, desired)
def shape_global(self): """ Get the global shape across the current :class:`~ocgis.OcgVM`. :rtype: :class:`tuple` of :class:`int` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) maxd = [max(d.bounds_global) for d in self.dimensions] shapes = vm.gather(maxd) if vm.rank == 0: shape_global = tuple(np.max(shapes, axis=0)) else: shape_global = None shape_global = vm.bcast(shape_global) return shape_global
def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode, **kwargs): raise_if_empty(vc) iter_kwargs = kwargs.pop(KeywordArgument.ITER_KWARGS, {}) fieldnames = list(six.next(vc.iter(**iter_kwargs))[1].keys()) if vm.rank == 0 and write_mode != MPIWriteMode.FILL: with driver_scope(cls, opened_or_path, mode='w') as opened: writer = csv.DictWriter(opened, fieldnames) writer.writeheader() if write_mode != MPIWriteMode.TEMPLATE: for current_rank_write in vm.ranks: if vm.rank == current_rank_write: with driver_scope(cls, opened_or_path, mode='a') as opened: writer = csv.DictWriter(opened, fieldnames) for _, record in vc.iter(**iter_kwargs): writer.writerow(record) vm.barrier()
def update_crs(self, to_crs, from_crs=None): """ Update the coordinate system in place. :param to_crs: The destination coordinate system. :type to_crs: :class:`~ocgis.variable.crs.AbstractCRS` :param from_crs: Optional original coordinate system to temporarily assign to the data. Useful when the object's coordinate system is different from the desired coordinate system. :type from_crs: :class:`~ocgis.variable.crs.AbstractCRS` """ raise_if_empty(self) if self.crs is None and from_crs is None: msg = 'The current CRS is None and cannot be updated. Has the coordinate system been assigned ' \ 'appropriately?' raise ValueError(msg) if to_crs is None: msg = 'The destination CRS may not be None. Has the coordinate system been assigned appropriately?' raise ValueError(msg)
def get_extent_global(container): raise_if_empty(container) extent = container.extent extents = vm.gather(extent) if vm.rank == 0: extents = [e for e in extents if e is not None] extents = np.array(extents) ret = [None] * 4 ret[0] = np.min(extents[:, 0]) ret[1] = np.min(extents[:, 1]) ret[2] = np.max(extents[:, 2]) ret[3] = np.max(extents[:, 3]) ret = tuple(ret) else: ret = None ret = vm.bcast(ret) return ret
def _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path, src_resmax=None): src_field = rd_src.create_field() dst_field = rd_dst.create_field() sso = SpatialSubsetOperation(src_field) with grid_abstraction_scope(dst_field.grid, Topology.POLYGON): dst_field_extent = dst_field.grid.extent_global subset_geom = GeometryVariable.from_shapely(box(*dst_field_extent), crs=dst_field.crs, is_bbox=True) if src_resmax is None: src_resmax = src_field.grid.resolution_max buffer_value = GridChunkerConstants.BUFFER_RESOLUTION_MODIFIER * src_resmax sub_src = sso.get_spatial_subset('intersects', subset_geom, buffer_value=buffer_value, optimized_bbox_subset=True) # No empty spatial subsets allowed through CLI. There will be nothing for ESMF to do. raise_if_empty(sub_src, check_current=True) # Try to reduce the coordinate indexing for unstructured grids. with ocgis.vm.scoped_by_emptyable('subset reduce/write', sub_src): if not ocgis.vm.is_null: # Attempt to reindex the subset. try: reduced = sub_src.grid.reduce_global() except AttributeError: pass except ValueError: if sub_src.driver.__class__ == DriverNetcdfUGRID: raise else: sub_src = reduced.parent # Write the subset to file. sub_src.write(spatial_subset_path)
def get_extent_global(container): raise_if_empty(container) extent = container.extent extents = vm.gather(extent) # ocgis_lh(msg='extents={}'.format(extents), logger='spatial.base', level=logging.DEBUG) if vm.rank == 0: extents = [e for e in extents if e is not None] extents = np.array(extents) ret = [None] * 4 ret[0] = np.min(extents[:, 0]) ret[1] = np.min(extents[:, 1]) ret[2] = np.max(extents[:, 2]) ret[3] = np.max(extents[:, 3]) ret = tuple(ret) else: ret = None ret = vm.bcast(ret) return ret
def get_intersects(self, *args, **kwargs): """ Perform an intersects spatial operations on the geometry variable. :keyword bool return_slice: (``=False``) If ``True``, return the _global_ slice that will guarantee no masked elements outside the subset geometry as the second element in the return value. :keyword bool cascade: (``=True``) If ``True`` (the default), set the mask following the spatial operation on all variables in the parent collection. :returns: shallow copy of the geometry variable :rtype: :class:`~ocgis.GeometryVariable` | ``(<geometry variable>, <slice>)`` :raises: :class:`~ocgis.exc.EmptySubsetError` """ raise_if_empty(self) return_slice = kwargs.pop(KeywordArgument.RETURN_SLICE, False) cascade = kwargs.pop(KeywordArgument.CASCADE, True) ret = self.copy() intersects_mask_value = ret.get_mask_from_intersects(*args, **kwargs) ret, ret_mask, ret_slice = get_masking_slice(intersects_mask_value, ret) if not ret.is_empty: ret.set_mask(ret_mask.get_value(), cascade=cascade, update=True) else: for var in list(ret.parent.values()): assert var.is_empty # TODO: need to implement fancy index-based slicing for the one-dimensional unstructured case. Difficult in parallel. # if self.ndim == 1: # # For one-dimensional data, assume it is unstructured and compress the returned data. # adjust = np.where(np.invert(ret.get_mask())) # ret_slc = adjust if return_slice: ret = (ret, ret_slice) return ret
def create_ugid_global(self, name, start=1): """ Same as :meth:`~ocgis.GeometryVariable.create_ugid` but collective across the current :class:`~ocgis.OcgVM`. :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) sizes = vm.gather(self.size) if vm.rank == 0: start = start for idx, n in enumerate(vm.ranks): if n == vm.rank: rank_start = start else: vm.comm.send(start, dest=n) start += sizes[idx] else: rank_start = vm.comm.recv(source=0) return self.create_ugid(name, start=rank_start)
def write_variable_collection(cls, vc, opened_or_path, **kwargs): raise_if_empty(vc) if 'ranks_to_write' in kwargs: raise TypeError("write_variable_collection() got an unexepcted keyword argument 'ranks_to_write'") write_mode = kwargs.pop(KeywordArgument.WRITE_MODE, None) if vm.size > 1: if cls.inquire_opened_state(opened_or_path): raise ValueError('Only paths allowed for parallel writes.') if write_mode is None: if vm.size > 1: write_modes = [MPIWriteMode.TEMPLATE, MPIWriteMode.FILL] else: write_modes = [MPIWriteMode.NORMAL] else: write_modes = [write_mode] for write_mode in write_modes: cls._write_variable_collection_main_(vc, opened_or_path, write_mode, **kwargs)
def get_masking_slice(intersects_mask_value, target, apply_slice=True): """ Collective! :param intersects_mask_value: The mask to use for creating the slice indices. :type intersects_mask_value: :class:`numpy.ndarray`, dtype=bool :param target: The target slicable object to slice. :param bool apply_slice: If ``True``, apply the slice. """ raise_if_empty(target) if intersects_mask_value is None: local_slice = None else: if intersects_mask_value.all(): local_slice = None elif not intersects_mask_value.any(): shp = intersects_mask_value.shape local_slice = [(0, shp[0]), (0, shp[1])] else: _, local_slice = get_trimmed_array_by_mask(intersects_mask_value, return_adjustments=True) local_slice = [(l.start, l.stop) for l in local_slice] if local_slice is not None: offset_local_slice = [None] * len(local_slice) for idx in range(len(local_slice)): offset = target.dimensions[idx].bounds_local[0] offset_local_slice[idx] = (local_slice[idx][0] + offset, local_slice[idx][1] + offset) else: offset_local_slice = None gathered_offset_local_slices = vm.gather(offset_local_slice) if vm.rank == 0: gathered_offset_local_slices = [ g for g in gathered_offset_local_slices if g is not None ] if len(gathered_offset_local_slices) == 0: raise_empty_subset = True else: raise_empty_subset = False offset_array = np.array(gathered_offset_local_slices) global_slice = [None] * offset_array.shape[1] for idx in range(len(global_slice)): global_slice[idx] = (np.min(offset_array[:, idx, :]), np.max(offset_array[:, idx, :])) else: global_slice = None raise_empty_subset = None raise_empty_subset = vm.bcast(raise_empty_subset) if raise_empty_subset: raise EmptySubsetError global_slice = vm.bcast(global_slice) global_slice = tuple([slice(g[0], g[1]) for g in global_slice]) intersects_mask = Variable(name='mask_gather', value=intersects_mask_value, dimensions=target.dimensions, dtype=bool) if apply_slice: if vm.size_global > 1: ret = target.get_distributed_slice(global_slice) ret_mask = intersects_mask.get_distributed_slice(global_slice) else: ret = target.__getitem__(global_slice) ret_mask = intersects_mask.__getitem__(global_slice) else: ret = target ret_mask = intersects_mask return ret, ret_mask, global_slice
def _update_aggregation_wrapping_crs_(obj, alias, sfield, subset_sdim, subset_ugid): raise_if_empty(sfield) ocgis_lh('entering _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Aggregate if requested. if obj.ops.aggregate: ocgis_lh('aggregate requested in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # There may be no geometries if we are working with a gridded dataset. Load the geometries if this is the case. sfield.set_abstraction_geom() ocgis_lh('after sfield.set_abstraction_geom in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Union the geometries and spatially average the data variables. # with vm.scoped(vm.get_live_ranks_from_object(sfield)): sfield = sfield.geom.get_unioned(spatial_average=sfield.data_variables) ocgis_lh('after sfield.geom.get_unioned in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # None is returned for the non-root process. Check we are in parallel and create an empty field. if sfield is None: if vm.size == 1: raise ValueError('None should not be returned from get_unioned if running on a single processor.') else: sfield = Field(is_empty=True) else: sfield = sfield.parent vm.create_subcomm_by_emptyable(SubcommName.SPATIAL_AVERAGE, sfield, is_current=True, clobber=True) if not vm.is_null and subset_sdim is not None and subset_sdim.geom is not None: # Add the unique geometry identifier variable. This should match the selection geometry's identifier. new_gid_variable_kwargs = dict(name=HeaderName.ID_GEOMETRY, value=subset_sdim.geom.ugid.get_value(), dimensions=sfield.geom.dimensions) dm = get_data_model(obj.ops) new_gid_variable = create_typed_variable_from_data_model('int', data_model=dm, **new_gid_variable_kwargs) sfield.geom.set_ugid(new_gid_variable) if vm.is_null: ocgis_lh(msg='null communicator following spatial average. returning.', logger=obj._subset_log, level=logging.DEBUG) return sfield raise_if_empty(sfield) ocgis_lh(msg='before wrapped_state in _update_aggregation_wrapping_crs_', logger=obj._subset_log, level=logging.DEBUG) try: wrapped_state = sfield.wrapped_state except WrappedStateEvalTargetMissing: # If there is no target for wrapping evaluation, then consider this unknown. wrapped_state = WrappedState.UNKNOWN ocgis_lh(msg='after wrapped_state in _update_aggregation_wrapping_crs_', logger=obj._subset_log, level=logging.DEBUG) # Wrap the returned data. if not env.OPTIMIZE_FOR_CALC and not sfield.is_empty: if wrapped_state == WrappedState.UNWRAPPED: ocgis_lh('wrap target is empty: {}'.format(sfield.is_empty), obj._subset_log, level=logging.DEBUG) # There may be no geometries if we are working with a gridded dataset. Load the geometries if this # is the case. sfield.set_abstraction_geom() if obj.ops.output_format in constants.VECTOR_OUTPUT_FORMATS and obj.ops.vector_wrap: ocgis_lh('wrapping output geometries', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Deepcopy geometries before wrapping as wrapping will be performed inplace. The original field may # need to be reused for additional subsets. geom = sfield.geom copied_geom = geom.get_value().copy() geom.set_value(copied_geom) # Some grids do not play nicely with wrapping. Bounds may be less than zero for an unwrapped grid. # Force wrapping if it is requested. Normally, when force is false there is a pass-through that will # leave the data untouched. geom.wrap(force=True) ocgis_lh('finished wrapping output geometries', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Transform back to rotated pole if necessary. original_rotated_pole_crs = obj._backtransform.get(constants.BackTransform.ROTATED_POLE) if original_rotated_pole_crs is not None: if not isinstance(obj.ops.output_crs, (Spherical, WGS84)): sfield.update_crs(original_rotated_pole_crs) # Update the coordinate system of the data output. if obj.ops.output_crs is not None: # If the geometry is not none, it may need to be projected to match the output coordinate system. if subset_sdim is not None and subset_sdim.crs != obj.ops.output_crs: subset_sdim.update_crs(obj.ops.output_crs) # Update the subsetted field's coordinate system. sfield = sfield.copy() sfield.update_crs(obj.ops.output_crs) # Wrap or unwrap the data if the coordinate system permits. _update_wrapping_(obj, sfield) ocgis_lh('leaving _update_aggregation_wrapping_crs_', obj._subset_log, level=logging.DEBUG) return sfield
def _iter_collections_(self): """:rtype: :class:`ocgis.collection.base.AbstractCollection`""" # Multivariate calculations require datasets come in as a list with all variable inputs part of the same # sequence. if self._has_multivariate_calculations: itr_rd = [[rd for rd in self.ops.dataset]] # Otherwise, process geometries expects a single element sequence. else: itr_rd = [[rd] for rd in self.ops.dataset] # Configure the progress object. self._progress.n_subsettables = len(itr_rd) self._progress.n_geometries = get_default_or_apply(self.ops.geom, len, default=1) self._progress.n_calculations = get_default_or_apply(self.ops.calc, len, default=0) # Some introductory logging. msg = '{0} dataset collection(s) to process.'.format(self._progress.n_subsettables) ocgis_lh(msg=msg, logger=self._subset_log) if self.ops.geom is None: msg = 'Entire spatial domain returned. No selection geometries requested.' else: msg = 'Each data collection will be subsetted by {0} selection geometries.'.format( self._progress.n_geometries) ocgis_lh(msg=msg, logger=self._subset_log) if self._progress.n_calculations == 0: msg = 'No calculations requested.' else: msg = 'The following calculations will be applied to each data collection: {0}.'. \ format(', '.join([_['func'] for _ in self.ops.calc])) ocgis_lh(msg=msg, logger=self._subset_log) # Process the incoming datasets. Convert from request datasets to fields as needed. for rds in itr_rd: try: msg = 'Processing URI(s): {0}'.format([rd.uri for rd in rds]) except AttributeError: # Field objects have no URIs. Multivariate calculations change how the request dataset iterator is # configured as well. msg = [] for rd in rds: try: msg.append(rd.uri) except AttributeError: # Likely a field object which does have a name. msg.append(rd.name) msg = 'Processing URI(s) / field names: {0}'.format(msg) ocgis_lh(msg=msg, logger=self._subset_log) for coll in self._process_subsettables_(rds): # If there are calculations, do those now and return a collection. if not vm.is_null and self.cengine is not None: ocgis_lh('Starting calculations.', self._subset_log) raise_if_empty(coll) # Look for any temporal grouping optimizations. if self.ops.optimizations is None: tgds = None else: tgds = self.ops.optimizations.get('tgds') # Execute the calculations. coll = self.cengine.execute(coll, file_only=self.ops.file_only, tgds=tgds) # If we need to spatially aggregate and calculations used raw values, update the collection # fields and subset geometries. if self.ops.aggregate and self.ops.calc_raw: coll_to_itr = coll.copy() for sfield, container in coll_to_itr.iter_fields(yield_container=True): sfield = _update_aggregation_wrapping_crs_(self, None, sfield, container, None) coll.add_field(sfield, container, force=True) else: # If there are no calculations, mark progress to indicate a geometry has been completed. self._progress.mark() # Conversion of groups. if self.ops.output_grouping is not None: raise NotImplementedError else: ocgis_lh('_iter_collections_ yielding', self._subset_log, level=logging.DEBUG) yield coll
def _process_geometries_(self, itr, field, alias): """ :param itr: An iterator yielding :class:`~ocgis.Field` objects for subsetting. :type itr: [None] or [:class:`~ocgis.Field`, ...] :param :class:`ocgis.Field` field: The target field for operations. :param str alias: The request data alias currently being processed. :rtype: :class:`~ocgis.SpatialCollection` """ assert isinstance(field, Field) ocgis_lh('processing geometries', self._subset_log, level=logging.DEBUG) # Process each geometry. for subset_field in itr: # Initialize the collection storage. coll = self._get_initialized_collection_() if vm.is_null: sfield = field else: # Always work with a copy of the subset geometry. This gets twisted in interesting ways depending on the # subset target with wrapping, coordinate system conversion, etc. subset_field = deepcopy(subset_field) if self.ops.regrid_destination is not None: # If there is regridding, make another copy as this geometry may be manipulated during subsetting of # sources. subset_field_for_regridding = deepcopy(subset_field) # Operate on the rotated pole coordinate system by first transforming it to the default coordinate # system. key = constants.BackTransform.ROTATED_POLE self._backtransform[key] = self._get_update_rotated_pole_state_(field, subset_field) # Check if the geometric abstraction is available on the field object. self._assert_abstraction_available_(field) # Return a slice or snippet if either of these are requested. field = self._get_slice_or_snippet_(field) # Choose the subset UGID value. if subset_field is None: msg = 'No selection geometry. Returning all data. No unique geometry identifier.' subset_ugid = None else: subset_ugid = subset_field.geom.ugid.get_value()[0] msg = 'Subsetting with selection geometry having UGID={0}'.format(subset_ugid) ocgis_lh(msg=msg, logger=self._subset_log) if subset_field is not None: # If the coordinate systems differ, update the spatial subset's CRS to match the field. if subset_field.crs is not None and subset_field.crs != field.crs: subset_field.update_crs(field.crs) # If the geometry is a point, it needs to be buffered if there is a search radius multiplier. subset_field = self._get_buffered_subset_geometry_if_point_(field, subset_field) # If there is a selection geometry present, use it for the spatial subset. if not, all the field's data # is being returned. if subset_field is None: sfield = field else: sfield = self._get_spatially_subsetted_field_(alias, field, subset_field, subset_ugid) ocgis_lh(msg='after self._get_spatially_subsetted_field_', logger=self._subset_log, level=logging.DEBUG) # Create the subcommunicator following the data subset to ensure non-empty communication. vm.create_subcomm_by_emptyable(SubcommName.FIELD_SUBSET, sfield, is_current=True, clobber=True) if not vm.is_null: if not sfield.is_empty and not self.ops.allow_empty: raise_if_empty(sfield) # If the base size is being requested, bypass the rest of the operations. if not self._request_base_size_only: # Perform regridding operations if requested. if self.ops.regrid_destination is not None and sfield.regrid_source: sfield = self._get_regridded_field_with_subset_(sfield, subset_field_for_regridding=subset_field_for_regridding) else: ocgis_lh(msg='no regridding operations', logger=self._subset_log, level=logging.DEBUG) # If empty returns are allowed, there may be an empty field. if sfield is not None: # Only update spatial stuff if there are no calculations and, if there are calculations, # those calculations are not expecting raw values. if self.ops.calc is None or (self.ops.calc is not None and not self.ops.calc_raw): # Update spatial aggregation, wrapping, and coordinate systems. sfield = _update_aggregation_wrapping_crs_(self, alias, sfield, subset_field, subset_ugid) ocgis_lh('after _update_aggregation_wrapping_crs_ in _process_geometries_', self._subset_log, level=logging.DEBUG) # Add the created field to the output collection with the selection geometry. if sfield is None: assert self.ops.aggregate if sfield is not None: coll.add_field(sfield, subset_field) yield coll
def write_field(cls, field, opened_or_path, **kwargs): raise_if_empty(field) vc_to_write = cls._get_field_write_target_(field) cls.write_variable_collection(vc_to_write, opened_or_path, **kwargs)
def get_distributed_slice(self, slc): """ Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be empty. :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy ``slc`` is not manipulated or redistributed prior to slicing. :rtype: :class:`~ocgis.Dimension` :raises: :class:`~ocgis.exc.EmptyObjectError` """ raise_if_empty(self) slc = get_formatted_slice(slc, 1)[0] is_fancy = not isinstance(slc, slice) if not is_fancy and slc == slice(None): ret = self.copy() # Use standard slicing for non-distributed dimensions. elif not self.dist: ret = self[slc] else: if is_fancy: local_slc = slc else: local_slc = get_global_to_local_slice((slc.start, slc.stop), self.bounds_local) if local_slc is not None: local_slc = slice(*local_slc) # Slice does not overlap local bounds. The dimension is now empty with size 0. if local_slc is None: ret = self.copy() ret.convert_to_empty() dimension_size = 0 # Slice overlaps so do a slice on the dimension using the local slice. else: ret = self[local_slc] dimension_size = len(ret) assert dimension_size >= 0 dimension_sizes = vm.gather(dimension_size) if vm.rank == 0: sum_dimension_size = 0 for ds in dimension_sizes: try: sum_dimension_size += ds except TypeError: pass bounds_global = (0, sum_dimension_size) else: bounds_global = None bounds_global = vm.bcast(bounds_global) if not ret.is_empty: ret.bounds_global = bounds_global # Normalize the local bounds on live ranks. inner_live_ranks = get_nonempty_ranks(ret, vm) with vm.scoped('bounds normalization', inner_live_ranks): if not vm.is_null: if vm.rank == 0: adjust = len(ret) else: adjust = None adjust = vm.bcast(adjust) for current_rank in vm.ranks: if vm.rank == current_rank: if vm.rank != 0: ret.bounds_local = [b + adjust for b in ret.bounds_local] adjust += len(ret) vm.barrier() adjust = vm.bcast(adjust, root=current_rank) return ret
def write_variable(cls, var, dataset, write_mode=MPIWriteMode.NORMAL, **kwargs): """ Write a variable to an open netCDF dataset object. :param var: Variable object. :param dataset: Open netCDF dataset object. :param kwargs: Arguments to netCDF variable creation with additional keyword arguments below. :keyword bool file_only: (``=False``) If ``True``, do not write the value to the output file. Create an empty netCDF file. :keyword bool unlimited_to_fixed_size: (``=False``) If ``True``, convert the unlimited dimension to a fixed size. """ # There should never be any write operations associated with an empty variable. raise_if_empty(var) # Write the parent collection if available on the variable. if not var.is_orphaned: parent_kwargs = {} parent_kwargs[KeywordArgument.VARIABLE_KWARGS] = kwargs return var.parent.write(dataset, **parent_kwargs) assert isinstance(dataset, nc.Dataset) file_only = kwargs.pop(KeywordArgument.FILE_ONLY, False) unlimited_to_fixed_size = kwargs.pop(KeywordArgument.UNLIMITED_TO_FIXED_SIZE, False) # No data should be written during a global write. Data will be filled in during the append process. if write_mode == MPIWriteMode.TEMPLATE: file_only = True if var.name is None: msg = 'A variable "name" is required.' raise ValueError(msg) # Dimension creation should not occur during a fill operation. The dimensions and variables have already been # created. if write_mode != MPIWriteMode.FILL: dimensions = var.dimensions dtype = cls.get_variable_write_dtype(var) if isinstance(dtype, ObjectType): dtype = dtype.create_vltype(dataset, dimensions[0].name + '_VLType') # Assume we are writing string data if the data type is object. elif dtype == str or var.is_string_object: dtype = 'S1' if len(dimensions) > 0: # Special handling for string variables. if dtype == 'S1': max_length = var.string_max_length_global assert max_length is not None dimensions = [var.dimensions[0], Dimension('{}_ocgis_slen'.format(var.name), max_length)] dimensions = list(dimensions) # Convert the unlimited dimension to fixed size if requested. for idx, d in enumerate(dimensions): if d.is_unlimited and unlimited_to_fixed_size: dimensions[idx] = Dimension(d.name, size=var.shape[idx]) break # Create the dimensions. for dim in dimensions: create_dimension_or_pass(dim, dataset, write_mode=write_mode) dimensions = [d.name for d in dimensions] # Only use the fill value if something is masked. is_nc3 = dataset.data_model.startswith('NETCDF3') if ((len(dimensions) > 0 and var.has_masked_values) and ( write_mode == MPIWriteMode.TEMPLATE or not file_only)) or ( is_nc3 and not var.has_allocated_value and len( dimensions) > 0) or (env.USE_NETCDF4_MPI and var.has_mask and vm.size > 1): fill_value = cls.get_variable_write_fill_value(var) else: # Copy from original attributes. if '_FillValue' not in var.attrs: fill_value = None else: fill_value = cls.get_variable_write_fill_value(var) if write_mode == MPIWriteMode.FILL: ncvar = dataset.variables[var.name] else: ncvar = dataset.createVariable(var.name, dtype, dimensions=dimensions, fill_value=fill_value, **kwargs) if write_mode == MPIWriteMode.ASYNCHRONOUS: # Tell NC4 we are writing the variable in parallel ncvar.set_collective(True) # Do not fill values on file_only calls. Also, only fill values for variables with dimension greater than zero. if not file_only and not var.is_empty and not isinstance(var, CoordinateReferenceSystem): if not var.is_string_object and isinstance(var.dtype, ObjectType) and not isinstance(var, TemporalVariable): bounds_local = var.dimensions[0].bounds_local for idx in range(bounds_local[0], bounds_local[1]): ncvar[idx] = np.array(var.get_value()[idx - bounds_local[0]]) else: fill_slice = get_slice_sequence_using_local_bounds(var) data_value = cls.get_variable_write_value(var) # Only write allocated values. if data_value is not None: if var.dtype == str or var.is_string_object: for idx in range(fill_slice[0].start, fill_slice[0].stop): try: curr_value = data_value[idx - fill_slice[0].start] except Exception as e: msg = "Variable name is '{}'. Original message: ".format(var.name) + str(e) raise e.__class__(msg) for sidx, sval in enumerate(curr_value): ncvar[idx, sidx] = sval elif var.ndim == 0: ncvar[:] = data_value else: try: ncvar.__setitem__(fill_slice, data_value) except Exception as e: msg = "Variable name is '{}'. Original message: ".format(var.name) + str(e) raise e.__class__(msg) # Only set variable attributes if this is not a fill operation. if write_mode != MPIWriteMode.FILL: var.write_attributes_to_netcdf_object(ncvar) if var.units is not None: ncvar.setncattr('units', str(var.units)) dataset.sync()