def validate(cls, ops): if ops.calc_sample_size: from ocgis.api.parms.definition import CalcSampleSize exc = DefinitionValidationError(CalcSampleSize, 'Multivariate functions do not calculate sample size at this time.') ocgis_lh(exc=exc, logger='calc.base') # ensure the required variables are present should_raise = False for c in ops.calc: if c['func'] == cls.key: kwds = c['kwds'] # check the required variables are keyword arguments if not len(set(kwds.keys()).intersection(set(cls.required_variables))) >= 2: should_raise = True break # ensure the mapped aliases exist for xx in cls.required_variables: to_check = kwds[xx] if to_check not in ops.dataset: should_raise = True break if should_raise: from ocgis.api.parms.definition import Calc exc = DefinitionValidationError(Calc, 'Variable aliases are missing for multivariate function "{0}". Required variable aliases are: {1}.'.format( cls.__name__, cls.required_variables)) ocgis_lh(exc=exc, logger='calc.base')
def __init__(self, *args, **kwargs): if kwargs.get('calc_sample_size') is True: exc = SampleSizeNotImplemented(self.__class__, 'Multivariate functions do not calculate sample size at this time.') ocgis_lh(exc=exc, logger='calc.base') else: AbstractFunction.__init__(self, *args, **kwargs)
def _get_update_rotated_pole_state_(self, field, subset_sdim): """ Rotated pole coordinate systems are handled internally by transforming the CRS to a geographic coordinate system. :param field: :type field: :class:`ocgis.interface.base.field.Field` :param subset_sdim: :type subset_sdim: :class:`ocgis.interface.base.dimension.spatial.SpatialDimension` or None :rtype: None or :class:`ocgis.interface.base.crs.CFRotatedPole` :raises: AssertionError """ # CFRotatedPole takes special treatment. only do this if a subset geometry is available. this variable is # needed to determine if backtransforms are necessary. original_rotated_pole_crs = None if isinstance(field.spatial.crs, CFRotatedPole): # only transform if there is a subset geometry if subset_sdim is not None or self.ops.aggregate or self.ops.spatial_operation == 'clip': # update the CRS. copy the original CRS for possible later transformation back to rotated pole. original_rotated_pole_crs = copy(field.spatial.crs) ocgis_lh('initial rotated pole transformation...', self._subset_log, level=logging.DEBUG) field.spatial.update_crs(CFWGS84()) ocgis_lh('...finished initial rotated pole transformation', self._subset_log, level=logging.DEBUG) return original_rotated_pole_crs
def parse(self,value): if type(value) in [Polygon,MultiPolygon,Point]: ret = [{'geom':value,'properties':{'ugid':1},'crs':CFWGS84()}] elif type(value) in [list,tuple]: if all([isinstance(element,dict) for element in value]): for ii,element in enumerate(value,start=1): if 'geom' not in element: ocgis_lh(exc=DefinitionValidationError(self,'Geometry dictionaries must have a "geom" key.')) if 'properties' not in element: element['properties'] = {'UGID':ii} if 'crs' not in element: element['crs'] = CFWGS84() ocgis_lh(msg='No CRS in geometry dictionary - assuming WGS84.',level=logging.WARN,check_duplicate=True) ret = value else: if len(value) == 2: geom = Point(value[0],value[1]) elif len(value) == 4: minx,miny,maxx,maxy = value geom = Polygon(((minx,miny), (minx,maxy), (maxx,maxy), (maxx,miny))) if not geom.is_valid: raise(DefinitionValidationError(self,'Parsed geometry is not valid.')) ret = [{'geom':geom,'properties':{'ugid':1},'crs':CFWGS84()}] self._bounds = geom.bounds elif isinstance(value,ShpCabinetIterator): self._shp_key = value.key ret = value else: ret = value return(ret)
def _format_parms_(self, values): """ :param values: A dictionary containing the parameter values to check. :type values: dict[str, type] """ ret = {} for k, v in values.iteritems(): try: if isinstance(v, self.parms_definition[k]): formatted = v else: formatted = self.parms_definition[k](v) # likely a nonetype except TypeError as e: if self.parms_definition[k] is None: formatted = v else: ocgis_lh(exc=e, logger="calc.base") # likely a required variable for a multivariate calculation except KeyError as e: if k in self.required_variables: formatted = values[k] else: ocgis_lh(exc=e, logger="calc.base") ret.update({k: formatted}) return ret
def _get_regrid_destination_(self): """ Prepare destination field for regridding. :rtype: :class:`~ocgis.SpatialDimension` """ # Spatially subset the regrid destination. ##################################################################### if self.subset_sdim is None: ocgis_lh(logger='regrid', msg='no spatial subsetting', level=logging.DEBUG) regrid_destination = self.field_dst else: if self.with_buffer: # Buffer the subset geometry by the resolution of the source field to improve chances of overlap between # source and destination extents. buffer_value = self.field_src.spatial.grid.resolution buffer_crs = self.field_src.spatial.crs else: buffer_value, buffer_crs = [None, None] ss = SpatialSubsetOperation(self.field_dst) regrid_destination = ss.get_spatial_subset('intersects', self.subset_sdim, use_spatial_index=env.USE_SPATIAL_INDEX, select_nearest=False, buffer_value=buffer_value, buffer_crs=buffer_crs) # Transform the coordinate system of the regrid destination. ################################################### # Update the coordinate system of the regrid destination if required. try: destination_sdim = regrid_destination.spatial except AttributeError: # Likely a spatial dimension object already. destination_sdim = regrid_destination # If switched to true, the regrid destination coordinate system must be updated to match the source. update_regrid_destination_crs = False if not isinstance(regrid_destination.crs, Spherical): if isinstance(regrid_destination, Field): if isinstance(destination_sdim.crs, WGS84) and regrid_destination._has_assigned_coordinate_system: update_regrid_destination_crs = True elif isinstance(destination_sdim.crs, WGS84) and not regrid_destination._has_assigned_coordinate_system: pass else: update_regrid_destination_crs = True else: if not isinstance(destination_sdim.crs, Spherical): update_regrid_destination_crs = True if update_regrid_destination_crs: ocgis_lh(logger='regrid', msg='updating regrid destination to spherical. regrid destination crs is: {}'.format( regrid_destination.crs), level=logging.DEBUG) destination_sdim.update_crs(Spherical()) else: destination_sdim.crs = Spherical() # Remove the mask from the destination field. ################################################################## new_mask = np.zeros(destination_sdim.shape, dtype=bool) destination_sdim.set_mask(new_mask) return destination_sdim
def add_field(self, ugid, geom, field, properties=None, name=None): """ :param int ugid: :param :class:`shapely.Geometry`: :param :class:`ocgis.Field`: :param dict properties: :param str name: """ name = name or field.name ## add field unique identifier if it does not exist try: if field.uid is None: field.uid = self._storage_id_next self._storage_id.append(field.uid) ## likely a nonetype from an empty subset except AttributeError as e: if field is None: pass else: ocgis_lh(exc=e, logger='collection') self.geoms.update({ugid:geom}) self.properties.update({ugid:properties}) if ugid not in self: self.update({ugid:{}}) assert(name not in self[ugid]) self[ugid].update({name:field})
def _execute_(self): for variable in self.field.variables.itervalues(): self.validate_units(variable) if self.file_only: fill = self._empty_fill else: fill = self.calculate(variable.value, **self.parms) dtype = self.dtype or variable.dtype if not self.file_only: if dtype != fill.dtype: fill = fill.astype(dtype) assert fill.shape == self.field.shape if not self.file_only: if self.tgd is not None: fill = self._get_temporal_agg_fill_(fill, f=self.aggregate_temporal, parms={}) else: if self.calc_sample_size: msg = "Sample sizes not relevant for scalar transforms." ocgis_lh(msg=msg, logger="calc.base", level=logging.WARN) fill = self._get_or_pass_spatial_agg_fill_(fill) units = self.get_output_units(variable) self._add_to_collection_( value=fill, parent_variables=[variable], dtype=self.dtype, fill_value=self.fill_value, units=units )
def _get_projection_coordinate_(target,meta): key = 'projection_{0}_coordinate'.format(target) for k,v in meta['variables'].iteritems(): if 'standard_name' in v['attrs']: if v['attrs']['standard_name'] == key: return(k) ocgis_lh(logger='crs',exc=ProjectionCoordinateNotFound(key))
def execute(self): """ Execute regridding operation. :rtype: :class:`~ocgis.Field` """ destination_sdim = self._get_regrid_destination_() self._update_regrid_source_coordinate_system_() # Regrid the input field. ocgis_lh(logger='regrid', msg='Creating regridded fields...', level=logging.INFO) regridded_source = list(iter_regridded_fields([self.field_src], destination_sdim, **self.regrid_options))[0] # Return the source field to its original coordinate system. if self._regrid_required_source_crs_update: ocgis_lh(logger='regrid', msg='Reverting source field to original coordinate system...', level=logging.INFO) regridded_source.spatial.update_crs(self._original_sfield_crs) else: regridded_source.spatial.crs = self._original_sfield_crs # Subset the output from the regrid operation as masked values may be introduced on the edges. if self.subset_sdim is not None: ss = SpatialSubsetOperation(regridded_source) regridded_source = ss.get_spatial_subset('intersects', self.subset_sdim, use_spatial_index=env.USE_SPATIAL_INDEX, select_nearest=False) return regridded_source
def _get_calendar_day_window_(cday_index,target_cday_index,width): width = int(width) try: assert(width >= 3) assert(width%2 != 0) except AssertionError: ocgis_lh(exc=ValueError('Kernel widths must be >= 3 and be oddly numbered.'),logger='calc.library') stride_dim = (width-1)/2 axis_length = cday_index.shape[0] lower_idx = target_cday_index - stride_dim upper_idx = target_cday_index + stride_dim + 1 if lower_idx < 0: a = cday_index[lower_idx:] b = cday_index[0:target_cday_index] lower = np.append(a,b) else: lower = cday_index[lower_idx:target_cday_index] if upper_idx > axis_length: a = cday_index[0:upper_idx-axis_length] b = cday_index[target_cday_index+1:upper_idx] upper = np.append(a,b) else: upper = cday_index[target_cday_index+1:upper_idx] ret = np.append(cday_index[target_cday_index],np.append(lower,upper)) return(ret)
def set_abstraction_geom(self, force=True, create_ugid=False, ugid_name=HeaderName.ID_GEOMETRY, ugid_start=1, set_ugid_as_data=False): """ Set the abstraction geometry for the field using the field's geometry variable or the field's grid abstraction geometry. :param bool force: If ``True`` (the default), clobber any existing geometry variables. :param bool create_ugid: If ``True``, create a unique identifier integer :class:`~ocgis.Variable` for the abstraction geometry. Only creates the variable if the geometry does not already have a ``ugid``. :param str ugid_name: Name for the ``ugid`` variable. :param int ugid_start: Starting value to use for the unique identifier. :param bool set_ugid_as_data: If ``True``, set the ``ugid`` variable as data on the field. Useful for writing shapefiles which require at least one data variable. :raises: ValueError """ if self.geom is None: if self.grid is None: raise ValueError('No grid available to set abstraction geometry.') else: self.set_geom_from_grid(force=force) if self.geom.ugid is None and create_ugid: ocgis_lh(msg='before self.geom.create_ugid_global in {}'.format(self.__class__), level=logging.DEBUG) self.geom.create_ugid_global(ugid_name, start=ugid_start) ocgis_lh(msg='after self.geom.create_ugid_global in {}'.format(self.__class__), level=logging.DEBUG) if set_ugid_as_data: self.add_variable(self.geom.ugid, force=True, is_data=True)
def _get_regrid_destination_(self): """ Prepare destination field for regridding. :rtype: (:class:`~ocgis.Field`, :class:`~ocgis.CoordinateReferenceSystem` or ``None``) """ # Transform the coordinate system of the regrid destination. ################################################### # Update the regrid destination coordinate system must be updated to match the source. if self.field_dst.crs != Spherical(): ocgis_lh(logger='regrid', msg='updating regrid destination to spherical. regrid destination crs is: {}'.format( self.field_dst.crs), level=logging.DEBUG) backtransform_crs = deepcopy(self.field_dst.crs) self.field_dst.update_crs(Spherical()) else: backtransform_crs = None # Spatially subset the regrid destination. ##################################################################### if self.subset_field is None: ocgis_lh(logger='regrid', msg='no spatial subsetting', level=logging.DEBUG) regrid_destination = self.field_dst else: ss = SpatialSubsetOperation(self.field_dst) regrid_destination = ss.get_spatial_subset('intersects', self.subset_field.geom, use_spatial_index=env.USE_SPATIAL_INDEX, select_nearest=False) return regrid_destination, backtransform_crs
def execute(self,coll,file_only=False): ## switch field type based on the types of calculations present if self._check_calculation_members_(self.funcs,AbstractMultivariateFunction): klass = DerivedMultivariateField else: klass = DerivedField ## group the variables. if grouping is None, calculations are performed ## on each element. array computations are taken advantage of. if self.grouping is not None: ocgis_lh('setting temporal grouping(s)','calc.engine') for v in coll.itervalues(): for k2,v2 in v.iteritems(): if k2 not in self.tgds: self.tgds[k2] = v2.temporal.get_grouping(self.grouping) ## iterate over functions for ugid,dct in coll.iteritems(): for alias_field,field in dct.iteritems(): new_temporal = self.tgds.get(alias_field) out_vc = VariableCollection() for f in self.funcs: ocgis_lh('calculating: {0}'.format(f),logger='calc.engine') function = f['ref'](alias=f['name'],dtype=None,field=field,file_only=file_only,vc=out_vc, parms=f['kwds'],tgd=new_temporal,use_raw_values=self.use_raw_values, calc_sample_size=self.calc_sample_size) out_vc = function.execute() new_temporal = new_temporal or field.temporal new_field = klass(variables=out_vc,temporal=new_temporal,spatial=field.spatial, level=field.level,realization=field.realization,meta=field.meta, uid=field.uid) coll[ugid][alias_field] = new_field return(coll)
def _get_field_write_target_(cls, field): """Collective!""" ocgis_lh(level=10, logger="driver.nc", msg="entering _get_field_write_target_") if field.crs is not None: field.crs.format_spatial_object(field) grid = field.grid if grid is not None: # If any grid pieces are masked, ensure the mask is created across all grids. has_mask = vm.gather(grid.has_mask) if vm.rank == 0: if any(has_mask): create_mask = True else: create_mask = False else: create_mask = None create_mask = vm.bcast(create_mask) if create_mask and not grid.has_mask: grid.get_mask(create=True) # Putting units on bounds for netCDF-CF can confuse some parsers. if grid.has_bounds: field = field.copy() field.x.bounds.attrs.pop('units', None) field.y.bounds.attrs.pop('units', None) # Remove the current coordinate system if this is a dummy coordinate system. if env.COORDSYS_ACTUAL is not None: field = field.copy() field.set_crs(env.COORDSYS_ACTUAL, should_add=True) return field
def _create_dimension_map_entries_dict_(axes, group_metadata, strict, attr_name='axis'): variables = group_metadata['variables'] check_bounds = list(axes.keys()) if 'realization' in check_bounds: check_bounds.pop(check_bounds.index('realization')) # Get the main entry for each axis. for k, v in list(axes.items()): axes[k] = create_dimension_map_entry(v, variables, strict=strict, attr_name=attr_name) # Attempt to find bounds for each entry (ignoring realizations). for k in check_bounds: if axes[k] is not None: keys = ['bounds'] if k == 'time': keys += ['climatology'] bounds_var = get_by_key_list(variables[axes[k]['variable']]['attrs'], keys) if bounds_var is not None: if bounds_var not in variables: msg = 'Bounds listed for variable "{0}" but the destination bounds variable "{1}" does not exist.'. \ format(axes[k]['variable'], bounds_var) ocgis_lh(msg, logger='nc.driver', level=logging.WARNING) bounds_var = None axes[k]['bounds'] = bounds_var entries = {k: v for k, v in list(axes.items()) if v is not None} return entries
def get_between(self,lower,upper,return_indices=False,closed=False): assert(lower <= upper) if self.bounds is None: if closed: select = np.logical_and(self.value > lower,self.value < upper) else: select = np.logical_and(self.value >= lower,self.value <= upper) else: bounds_min = np.min(self.bounds,axis=1) bounds_max = np.max(self.bounds,axis=1) if closed: select_lower = np.logical_or(bounds_min > lower,bounds_max > lower) select_upper = np.logical_or(bounds_min < upper,bounds_max < upper) else: select_lower = np.logical_or(bounds_min >= lower,bounds_max >= lower) select_upper = np.logical_or(bounds_min <= upper,bounds_max <= upper) select = np.logical_and(select_lower,select_upper) if select.any() == False: ocgis_lh(exc=EmptySubsetError(origin=self.name)) ret = self[select] if return_indices: indices = np.arange(select.shape[0]) ret = (ret,indices[select]) return(ret)
def _get_bounds_from_source_(self): # Allow NoneType bounds when there is no request dataset. ret = None if self._request_dataset is not None: assert self.axis is not None # Open the connection to the real dataset connection object. ds = self._request_dataset.driver.open() try: # Check for bounds. bounds_name = self._request_dataset.source_metadata['dim_map'][self.axis].get('bounds') if bounds_name is not None: try: ret = ds.variables[bounds_name][self._src_idx, :] except ValueError: shape = ds.variables[bounds_name] if len(shape) != 2 or shape[1] != 2: msg = ( 'The bounds variable "{0}" has an improper shape "{1}". Bounds variables should have ' 'dimensions (m,2).'.format(bounds_name, shape)) ocgis_lh(msg=msg, logger='interface.nc', level=logging.WARN) else: raise finally: self._request_dataset.driver.close(ds) return ret
def __init__(self,*args,**kwds): self.grid = kwds.pop('grid',None) self.crs = kwds.pop('crs',None) self.abstraction = kwds.pop('abstraction','polygon') self._geom = kwds.pop('geom',None) ## if a grid value is passed, then when it is reset if self._grid is not None: self._geom_to_grid = True else: self._geom_to_grid = False ## attempt to build the geometry dimension point = kwds.pop('point',None) polygon = kwds.pop('polygon',None) geom_kwds = dict(point=point,polygon=polygon) if any([g != None for g in geom_kwds.values()]): self._geom = SpatialGeometryDimension(**geom_kwds) if self.grid is None and self._geom is None: try: self.grid = SpatialGridDimension(row=kwds.pop('row'), col=kwds.pop('col')) except KeyError: ocgis_lh(exc=ValueError('A SpatialDimension without "grid" or "geom" arguments requires a "row" and "column".')) super(SpatialDimension,self).__init__(*args,**kwds)
def get_intersects(self,polygon,return_indices=False): ret = copy(self) if type(polygon) in (Point,MultiPoint): exc = ValueError('Only Polygons and MultiPolygons are acceptable geometry types for intersects operations.') ocgis_lh(exc=exc,logger='dimension.spatial') elif type(polygon) in (Polygon,MultiPolygon): ## for a polygon subset, first the grid is subsetted by the bounds ## of the polygon object. the intersects operations is then performed ## on the polygon/point representation as appropriate. minx,miny,maxx,maxy = polygon.bounds if self.grid is None: raise(NotImplementedError) else: ## reset the geometries ret._geom = None ## subset the grid by its bounding box ret.grid,slc = self.grid.get_subset_bbox(minx,miny,maxx,maxy,return_indices=True) ## update the unique identifier to copy the grid uid ret.uid = ret.grid.uid ## attempt to mask the polygons try: ret._geom._polygon = ret.geom.polygon.get_intersects_masked(polygon) grid_mask = ret.geom.polygon.value.mask except ImproperPolygonBoundsError: ret._geom._point = ret.geom.point.get_intersects_masked(polygon) grid_mask = ret.geom.point.value.mask ## transfer the geometry mask to the grid mask ret.grid.value.mask[:,:,:] = grid_mask.copy() else: raise(NotImplementedError) if return_indices: ret = (ret,slc) return(ret)
def test_system_with_callback(self): fp = get_temp_path(wd=self.current_dir_output) def callback(message, path=fp): with open(path, 'a') as sink: sink.write(message) sink.write('\n') class FooError(Exception): pass ocgis_lh.configure(callback=callback) ocgis_lh(msg='this is a test message') ocgis_lh() ocgis_lh(msg='this is a second test message') ocgis_lh(msg='this should not be there', level=logging.DEBUG) exc = FooError('foo message for value error') try: ocgis_lh(exc=exc) except FooError: pass with open(fp, 'r') as source: lines = source.readlines() self.assertEqual(lines, ['this is a test message\n', 'this is a second test message\n', 'FooError: foo message for value error\n'])
def __init__(self,ops,serial=True,nprocs=1): self.ops = ops self.serial = serial self.nprocs = nprocs self._subset_log = ocgis_lh.get_logger('subset') ## create the calculation engine if self.ops.calc is None: self.cengine = None else: ocgis_lh('initializing calculation engine',self._subset_log,level=logging.DEBUG) self.cengine = OcgCalculationEngine(self.ops.calc_grouping, self.ops.calc, raw=self.ops.calc_raw, agg=self.ops.aggregate, calc_sample_size=self.ops.calc_sample_size) ## in the case of netcdf output, geometries must be unioned. this is ## also true for the case of the selection geometry being requested as ## aggregated. if (self.ops.output_format == 'nc' or self.ops.agg_selection is True) \ and self.ops.geom is not None: ocgis_lh('aggregating selection geometry',self._subset_log) build = True for element_geom in self.ops.geom: if build: new_geom = element_geom['geom'] new_crs = element_geom['crs'] new_properties = {'UGID':1} build = False else: new_geom = new_geom.union(element_geom['geom']) itr = [{'geom':new_geom,'properties':new_properties,'crs':new_crs}] self.ops.geom = itr
def __init__(self,crs=None,prjs=None,epsg=None): if crs is None: if prjs is not None: crs = from_string(prjs) elif epsg is not None: sr = SpatialReference() sr.ImportFromEPSG(epsg) crs = from_string(sr.ExportToProj4()) else: raise(NotImplementedError) else: ## remove unicode and change to python types for k,v in crs.iteritems(): if type(v) == unicode: crs[k] = str(v) else: try: crs[k] = v.tolist() except AttributeError: continue sr = SpatialReference() sr.ImportFromProj4(to_string(crs)) self.value = from_string(sr.ExportToProj4()) try: assert(self.value != {}) except AssertionError: ocgis_lh(logger='crs',exc=ValueError('Empty CRS: The conversion to PROJ4 may have failed. The CRS value is: {0}'.format(crs)))
def __init__(self, *args, **kwargs): super(AbstractUnivariateFunction, self).__init__(*args, **kwargs) if self.calc_sample_size and self.tgd is None: msg = 'Sample sizes not relevant for scalar transforms with no temporal grouping. Setting to False.' ocgis_lh(msg=msg, logger='calc.base', level=logging.WARN) self.calc_sample_size = False
def _build_(self, coll): ret = CsvConverter._build_(self, coll) self._ugid_gid_store = {} if not self.ops.aggregate: fiona_path = os.path.join(self._get_or_create_shp_folder_(), self.prefix + '_gid.shp') archetype_field = coll._archetype_field try: fiona_crs = archetype_field.spatial.crs.value except AttributeError: if archetype_field.spatial.crs is None: raise ValueError('"crs" is None. A coordinate systems is required for writing to Fiona output.') else: raise fiona_schema = {'geometry': archetype_field.spatial.abstraction_geometry.geom_type, 'properties': OrderedDict([[constants.HEADERS.ID_DATASET.upper(), 'int'], [self.geom_uid, 'int'], [constants.HEADERS.ID_GEOMETRY.upper(), 'int']])} fiona_object = fiona.open(fiona_path, 'w', driver='ESRI Shapefile', crs=fiona_crs, schema=fiona_schema) else: ocgis_lh('creating a UGID-GID shapefile is not necessary for aggregated data. use UGID shapefile.', 'conv.csv-shp', logging.WARN) fiona_object = None ret.update({'fiona_object': fiona_object}) return ret
def _update_bounds_extrapolation_(self, field): try: name_x_variable = '{}_{}'.format(field.grid.x.name, constants.OCGIS_BOUNDS) name_y_variable = '{}_{}'.format(field.grid.y.name, constants.OCGIS_BOUNDS) field.grid.set_extrapolated_bounds(name_x_variable, name_y_variable, constants.OCGIS_BOUNDS) except BoundsAlreadyAvailableError: msg = 'Bounds/corners already on object. Ignoring "interpolate_spatial_bounds".' ocgis_lh(msg=msg, logger=self._subset_log, level=logging.WARNING)
def _validate_bounds_(self): try: assert(self._bounds.dtype == self._value.dtype) except AssertionError: try: self._bounds = np.array(self._bounds,dtype=self._value.dtype) except: ocgis_lh(exc=ValueError('Value and bounds data types do not match and types could not be casted.'))
def _get_value_(self): if self._data is None and self._value is None: ocgis_lh(exc=ValueError('Values were requested from data source, but no data source is available.')) elif self._src_idx is None and self._value is None: ocgis_lh(exc=ValueError('Values were requested from data source, but no source index source is available.')) else: self._set_value_from_source_() return(self._value)
def create_dimension_map_entry(src, variables, strict=False, attr_name='axis'): """ Create a dimension map entry dictionary by searching variable metadata using attribute constraints. :param src: The source information to use for constructing the entry. If ``src`` is a dictionary, it must have two entries. The key ``'value'`` corresponds to the string attribute value. The key ``'axis'`` is the representative axis to assign the source value (for example ``'X'`` or ``'Y'``). :type src: str | dict :param dict variables: The metadata entries for the group's variables. :param bool strict: If ``False``, do not use a strict interpretation of metadata. Allow some standard approaches for handling metadata exceptions. :param str attr_name: Name of the attribute to use for checking the attribute values form ``src``. :return: dict """ if isinstance(src, dict): axis = src['axis'] attr_value = src['value'] else: axis = src attr_value = src axis_vars = [] for variable in list(variables.values()): vattrs = variable.get('attrs', {}) if vattrs.get(attr_name) == attr_value: if len(variable['dimensions']) == 0: pass else: axis_vars.append(variable['name']) # Try to find by default names. if not strict and len(axis_vars) == 0: possible_names = CFName.get_axis_mapping().get(axis, []) for pn in possible_names: if pn in list(variables.keys()): axis_vars.append(variables[pn]['name']) if len(axis_vars) == 1: var_name = axis_vars[0] dims = list(variables[var_name]['dimensions']) if not strict: # Use default index positions for X/Y dimensions. if axis in ('X', 'Y') and len(dims) > 1: if axis == 'Y': dims = [dims[0]] elif axis == 'X': dims = [dims[1]] ret = {'variable': var_name, DimensionMapKey.DIMENSION: dims} elif len(axis_vars) > 1: msg = 'Multiple axis (axis="{}") possibilities found using variable(s) "{}". Use a dimension map to specify ' \ 'the appropriate coordinate dimensions.' ocgis_lh(msg.format(axis, axis_vars), level=logging.WARN, logger='ocgis.driver.nc', force=True) ret = None else: ret = None return ret
def _run_(): logpath = self.get_temporary_file_path('foo.log') ocgis_lh.configure(to_file=logpath) ocgis_lh(msg='oh my', level=logging.WARN) with open(logpath, 'r') as f: lines = f.readlines() lines = ''.join(lines) self.assertIn('OcgWarning', lines) self.assertIn('oh my', lines)
def format(self, value): if os.environ.get(self.env_name) is not None: msg = 'REFERENCE_PROJECTION may not be set as a system environment variable. It must be parameterized at runtime.' e = OcgisEnvironmentError(self, msg) ocgis_lh(exc=e, logger='env')
def create_merged_weight_file(self, merged_weight_filename, strict=False): """ Merge weight file chunks to a single, global weight file. :param str merged_weight_filename: Path to the merged weight file. :param bool strict: If ``False``, allow "missing" files where the iterator index cannot create a found file. It is best to leave these ``False`` as not all source and destinations are mapped. If ``True``, raise an """ if vm.size > 1: raise ValueError( "'create_merged_weight_file' does not work in parallel") index_filename = self.create_full_path_from_template('index_file') ifile = RequestDataset(uri=index_filename).get() ifile.load() ifc = GridChunkerConstants.IndexFile gidx = ifile[ifc.NAME_INDEX_VARIABLE].attrs src_global_shape = gidx[ifc.NAME_SRC_GRID_SHAPE] dst_global_shape = gidx[ifc.NAME_DST_GRID_SHAPE] # Get the global weight dimension size. n_s_size = 0 weight_filename = ifile[gidx[ifc.NAME_WEIGHTS_VARIABLE]] wv = weight_filename.join_string_value() split_weight_file_directory = self.paths['wd'] for wfn in map( lambda x: os.path.join(split_weight_file_directory, os.path.split(x)[1]), wv): ocgis_lh(msg="current merge weight file target: {}".format(wfn), level=logging.DEBUG, logger=_LOCAL_LOGGER) if not os.path.exists(wfn): if strict: raise IOError(wfn) else: continue curr_dimsize = RequestDataset(wfn).get().dimensions['n_s'].size # ESMF writes the weight file, but it may be empty if there are no generated weights. if curr_dimsize is not None: n_s_size += curr_dimsize # Create output weight file. wf_varnames = ['row', 'col', 'S'] wf_dtypes = [np.int32, np.int32, np.float64] vc = VariableCollection() dim = Dimension('n_s', n_s_size) for w, wd in zip(wf_varnames, wf_dtypes): var = Variable(name=w, dimensions=dim, dtype=wd) vc.add_variable(var) vc.write(merged_weight_filename) # Transfer weights to the merged file. sidx = 0 src_indices = self.src_grid._gc_create_global_indices_( src_global_shape) dst_indices = self.dst_grid._gc_create_global_indices_( dst_global_shape) out_wds = nc.Dataset(merged_weight_filename, 'a') for ii, wfn in enumerate( map(lambda x: os.path.join(split_weight_file_directory, x), wv)): if not os.path.exists(wfn): if strict: raise IOError(wfn) else: continue wdata = RequestDataset(wfn).get() for wvn in wf_varnames: odata = wdata[wvn].get_value() try: split_grids_directory = self.paths['wd'] odata = self._gc_remap_weight_variable_( ii, wvn, odata, src_indices, dst_indices, ifile, gidx, split_grids_directory=split_grids_directory) except IndexError as e: msg = "Weight filename: '{}'; Weight Variable Name: '{}'. {}".format( wfn, wvn, str(e)) raise IndexError(msg) out_wds[wvn][sidx:sidx + odata.size] = odata out_wds.sync() sidx += odata.size out_wds.close()
def _process_geometries_(self, itr, field, alias): """ :param itr: An iterator yielding :class:`~ocgis.Field` objects for subsetting. :type itr: [None] or [:class:`~ocgis.Field`, ...] :param :class:`ocgis.Field` field: The target field for operations. :param str alias: The request data alias currently being processed. :rtype: :class:`~ocgis.SpatialCollection` """ assert isinstance(field, Field) ocgis_lh('processing geometries', self._subset_log, level=logging.DEBUG) # Process each geometry. for subset_field in itr: # Initialize the collection storage. coll = self._get_initialized_collection_() if vm.is_null: sfield = field else: # Always work with a copy of the subset geometry. This gets twisted in interesting ways depending on the # subset target with wrapping, coordinate system conversion, etc. subset_field = deepcopy(subset_field) if self.ops.regrid_destination is not None: # If there is regridding, make another copy as this geometry may be manipulated during subsetting of # sources. subset_field_for_regridding = deepcopy(subset_field) # Operate on the rotated pole coordinate system by first transforming it to the default coordinate # system. key = constants.BackTransform.ROTATED_POLE self._backtransform[ key] = self._get_update_rotated_pole_state_( field, subset_field) # Check if the geometric abstraction is available on the field object. self._assert_abstraction_available_(field) # Return a slice or snippet if either of these are requested. field = self._get_slice_or_snippet_(field) # Choose the subset UGID value. if subset_field is None: msg = 'No selection geometry. Returning all data. No unique geometry identifier.' subset_ugid = None else: subset_ugid = subset_field.geom.ugid.get_value()[0] msg = 'Subsetting with selection geometry having UGID={0}'.format( subset_ugid) ocgis_lh(msg=msg, logger=self._subset_log) if subset_field is not None: # If the coordinate systems differ, update the spatial subset's CRS to match the field. if subset_field.crs is not None and subset_field.crs != field.crs: subset_field.update_crs(field.crs) # If the geometry is a point, it needs to be buffered if there is a search radius multiplier. subset_field = self._get_buffered_subset_geometry_if_point_( field, subset_field) # If there is a selection geometry present, use it for the spatial subset. if not, all the field's data # is being returned. if subset_field is None: sfield = field else: sfield = self._get_spatially_subsetted_field_( alias, field, subset_field, subset_ugid) ocgis_lh(msg='after self._get_spatially_subsetted_field_', logger=self._subset_log, level=logging.DEBUG) # Create the subcommunicator following the data subset to ensure non-empty communication. vm.create_subcomm_by_emptyable(SubcommName.FIELD_SUBSET, sfield, is_current=True, clobber=True) if not vm.is_null: if not sfield.is_empty and not self.ops.allow_empty: raise_if_empty(sfield) # If the base size is being requested, bypass the rest of the operations. if not self._request_base_size_only: # Perform regridding operations if requested. if self.ops.regrid_destination is not None and sfield.regrid_source: sfield = self._get_regridded_field_with_subset_( sfield, subset_field_for_regridding= subset_field_for_regridding) else: ocgis_lh(msg='no regridding operations', logger=self._subset_log, level=logging.DEBUG) # If empty returns are allowed, there may be an empty field. if sfield is not None: # Only update spatial stuff if there are no calculations and, if there are calculations, # those calculations are not expecting raw values. if self.ops.calc is None or ( self.ops.calc is not None and not self.ops.calc_raw): # Update spatial aggregation, wrapping, and coordinate systems. sfield = _update_aggregation_wrapping_crs_( self, alias, sfield, subset_field, subset_ugid) ocgis_lh( 'after _update_aggregation_wrapping_crs_ in _process_geometries_', self._subset_log, level=logging.DEBUG) # Add the created field to the output collection with the selection geometry. if sfield is None: assert self.ops.aggregate if sfield is not None: coll.add_field(sfield, subset_field) yield coll
def write(self): ## call subclass write method ocgis_lh('starting subclass write method', self._log, logging.DEBUG) ret = self._write_() ## added OCGIS metadata output if requested. if self.add_meta: ocgis_lh('adding OCGIS metadata file', 'conv', logging.DEBUG) lines = MetaConverter(self.ops).write() out_path = os.path.join( self.outdir, self.prefix + '_' + MetaConverter._meta_filename) with open(out_path, 'w') as f: f.write(lines) ## add the dataset descriptor file if specified if self._add_did_file: ocgis_lh('writing dataset description (DID) file', 'conv', logging.DEBUG) from ocgis.conv.csv_ import OcgDialect headers = [ 'DID', 'VARIABLE', 'ALIAS', 'URI', 'STANDARD_NAME', 'UNITS', 'LONG_NAME' ] out_path = os.path.join(self.outdir, self.prefix + '_did.csv') with open(out_path, 'w') as f: writer = csv.writer(f, dialect=OcgDialect) writer.writerow(headers) for rd in self.ops.dataset: row = [rd.did, rd.variable, rd.alias, rd.uri] ref_variable = rd.ds.metadata['variables'][ rd.variable]['attrs'] row.append(ref_variable.get('standard_name', None)) row.append(ref_variable.get('units', None)) row.append(ref_variable.get('long_name', None)) writer.writerow(row) ## add user-geometry if self._add_ugeom and self.ops.geom is not None: ocgis_lh('writer user-geometry shapefile', 'conv', logging.DEBUG) if self._add_ugeom_nest: shp_dir = os.path.join(self.outdir, 'shp') try: os.mkdir(shp_dir) ## catch if the directory exists except OSError: if os.path.exists(shp_dir): pass else: raise else: shp_dir = self.outdir shp_path = os.path.join(shp_dir, self.prefix + '_ugid.shp') self.ops.geom.write(shp_path) ## add source metadata if requested if self._add_source_meta: ocgis_lh('writing source metadata file', 'conv', logging.DEBUG) out_path = os.path.join(self.outdir, self.prefix + '_source_metadata.txt') to_write = [] for rd in self.ops.dataset: ip = Inspect(request_dataset=rd) to_write += ip.get_report() with open(out_path, 'w') as f: f.writelines('\n'.join(to_write)) ## return anything from the overloaded _write_ method. otherwise return ## the internal path. if ret is None: ret = self.path return (ret)
cmd = [MPIEXEC, '-n', str(nprocs), sys.executable, OCLI_EXE, 'chunked_rwg'] cmd.extend(['--source', dsrc['path'], '--esmf_src_type', dsrc['etype']]) cmd.extend( ['--destination', ddst['path'], '--esmf_dst_type', ddst['etype']]) cmd.extend(['--wd', wd]) cmd.extend(['--weight', weight]) if is_point: cmd.append('--spatial_subset') else: cmd.extend(['--nchunks_dst', str(ddst['nchunks_dst'])]) # cmd.extend(['--no_genweights']) return cmd if __name__ == '__main__': ocgis_lh(logger='chunker', msg='starting!') key_dst = 'scrip-unstruct' # key_dst = 'scrip-struct' # key_dst = 'scrip-point' cmd = create_command(WD, 'ugrid', key_dst, WEIGHT) ocgis_lh(logger='chunker', msg=' '.join(cmd)) subprocess.check_call(cmd) ocgis_lh(logger='chunker', msg='stopping!')
def test_system_combinations(self): _to_stream = [ True, False ] _to_file = [ os.path.join(env.DIR_OUTPUT, 'test_ocgis_log.log'), None ] _level = [logging.INFO, logging.DEBUG, logging.WARN] for ii, (to_file, to_stream, level) in enumerate(itertools.product(_to_file, _to_stream, _level)): ocgis_lh.configure(to_file=to_file, to_stream=to_stream, level=level) try: ocgis_lh(ii) ocgis_lh('a test message') subset = ocgis_lh.get_logger('subset') interp = ocgis_lh.get_logger('interp') ocgis_lh('a subset message', logger=subset) ocgis_lh('an interp message', logger=interp) ocgis_lh('a general message', alias='foo', ugid=10) ocgis_lh('another message', level=level) if to_file is not None: self.assertTrue(os.path.exists(to_file)) os.remove(to_file) finally: logging.shutdown()
def write(self): ocgis_lh('starting write method', self._log, logging.DEBUG) # Indicates if user geometries should be written to file. write_ugeom = False ncoll = len(self.ops.geom) build = True for i, coll in enumerate(self): ugids = coll.properties.keys() assert len(ugids) == 1 ugid = ugids[0] # Geometry centroid location lon, lat = coll.geoms[ugid].centroid.xy for field in coll.iter_fields(): lon_attrs = field.x.attrs.copy() lat_attrs = field.y.attrs.copy() # Removed for now. It'd be nice to find an elegant way to retain those. field.remove_variable('lat') field.remove_variable('lon') # Create new lon and lat variables field.add_variable( ocgis.Variable('lon', value=lon, dimensions=(DimensionName.UNIONED_GEOMETRY,), attrs=dict(lon_attrs, **{'long_name':'Centroid longitude'}) ) ) field.add_variable( ocgis.Variable('lat', value=lat, dimensions=(DimensionName.UNIONED_GEOMETRY,), attrs=dict(lat_attrs, **{'long_name':'Centroid latitude'}) ) ) if 'ocgis_spatial_mask' in field: # Remove the spatial_mask and replace by new one. field.remove_variable('ocgis_spatial_mask') grid = ocgis.Grid(field['lon'], field['lat'], abstraction='point', crs=field.crs, parent=field) grid.set_mask([[False,]]) field.set_grid(grid) # Geometry variables from the geom properties dict # There is no metadata for those... dm = get_data_model(self.ops) for key, val in coll.properties[ugid].items(): if np.issubdtype(type(val), int): dt = get_dtype('int', dm) elif np.issubdtype(type(val), float): dt = get_dtype('float', dm) else: dt='auto' field.add_variable( ocgis.Variable(key, value=[val,], dtype=dt, dimensions=(DimensionName.UNIONED_GEOMETRY,))) # ------------------ Dimension update ------------------------ # # Modify the dimensions for the number of geometries gdim = field.dimensions[DimensionName.UNIONED_GEOMETRY] gdim.set_size(ncoll) for var in field.iter_variables_by_dimensions([gdim]): d = var.dimensions_dict[DimensionName.UNIONED_GEOMETRY] d.bounds_local = (i, i+1) # ------------------------------------------------------------ # # CF-Conventions # Can this be anything else than a timeseries_id # Options are timeseries_id, profile_id, trajectory_id gid = field[HeaderName.ID_GEOMETRY] gid.attrs['cf_role'] = 'timeseries_id' # TODO: Hard-code the name in constants.py gdim.set_name('region') # Path to the output object. # I needed to put it here because _write_archetype pops it, so it's not available after the first loop. f = {KeywordArgument.PATH: self.path} # This will be changed to "write" if we are on the build loop. write_mode = MPIWriteMode.APPEND if build: # During a build loop, create the file and write the first series of records. Let the drivers determine # the appropriate write modes for handling parallelism. write_mode = None # Write the user geometries if selected and there is one present on the incoming collection. if self._add_ugeom and coll.has_container_geometries: write_ugeom = True if write_ugeom: if vm.rank == 0: # The output file name for the user geometries. ugid_shp_name = self.prefix + '_ugid.shp' if self._add_ugeom_nest: ugeom_fiona_path = os.path.join(self._get_or_create_shp_folder_(), ugid_shp_name) else: ugeom_fiona_path = os.path.join(self.outdir, ugid_shp_name) else: ugeom_fiona_path = None build = False f[KeywordArgument.WRITE_MODE] = write_mode self._write_coll_(f, coll) if write_ugeom: with vm.scoped(SubcommName.UGEOM_WRITE, [0]): if not vm.is_null: for subset_field in list(coll.children.values()): subset_field.write(ugeom_fiona_path, write_mode=write_mode, driver=DriverVector) # The metadata and dataset descriptor files may only be written if OCGIS operations are present. ops = self.ops if ops is not None and self.add_auxiliary_files and MPI_RANK == 0: # Add OCGIS metadata output if requested. if self.add_meta: ocgis_lh('adding OCGIS metadata file', 'conv', logging.DEBUG) from ocgis.conv.meta import MetaOCGISConverter lines = MetaOCGISConverter(ops).write() out_path = os.path.join(self.outdir, self.prefix + '_' + MetaOCGISConverter._meta_filename) with open(out_path, 'w') as f: f.write(lines) # Add the dataset descriptor file if requested. if self._add_did_file: ocgis_lh('writing dataset description (DID) file', 'conv', logging.DEBUG) path = os.path.join(self.outdir, self.prefix + '_did.csv') _write_dataset_identifier_file_(path, ops) # Add source metadata if requested. if self._add_source_meta: ocgis_lh('writing source metadata file', 'conv', logging.DEBUG) path = os.path.join(self.outdir, self.prefix + '_source_metadata.txt') _write_source_meta_(path, ops) # Return the internal path unless overloaded by subclasses. ret = self._get_return_() return ret
def _run_(): ocgis_lh.configure() self.assertTrue(ocgis_lh.null) env.SUPPRESS_WARNINGS = False ocgis_lh(level=logging.WARNING, exc=RuntimeWarning('show me')) env.SUPPRESS_WARNINGS = True
def _run_(): env.SUPPRESS_WARNINGS = False logpath = self.get_temporary_file_path('foo.log') ocgis_lh.configure(to_file=logpath) ocgis_lh(msg='hey there', level=logging.WARN) env.SUPPRESS_WARNINGS = True
def _process_subsettables_(self, rds): """ :param rds: Sequence of :class:~`ocgis.RequestDataset` objects. :type rds: sequence :rtype: :class:`ocgis.collection.base.AbstractCollection` """ ocgis_lh(msg='entering _process_subsettables_', logger=self._subset_log, level=logging.DEBUG) # This is used to define the group of request datasets for these like logging and exceptions. try: alias = '_'.join([r.field_name for r in rds]) except AttributeError: # Allow field objects with do not expose the "field_name" attribute. try: alias = '_'.join([r.name for r in rds]) except TypeError: # The alias is used for logging, etc. If it cannot be constructed easily, leave it as None. alias = None except NoDataVariablesFound: # If an alias is not provided and there are no data variables, set to None as this is used only for logging. alias = None ocgis_lh('processing...', self._subset_log, alias=alias, level=logging.DEBUG) # Create the field object. Field objects may be passed directly to operations. # Look for field optimizations. Field optimizations typically include pre-loaded datetime objects. if self.ops.optimizations is not None and 'fields' in self.ops.optimizations: ocgis_lh('applying optimizations', self._subset_log, level=logging.DEBUG) field = [ self.ops.optimizations['fields'][rd.field_name].copy() for rd in rds ] has_field_optimizations = True else: # Indicates no field optimizations loaded. has_field_optimizations = False try: # No field optimizations and data should be loaded from source. if not has_field_optimizations: ocgis_lh('creating field objects', self._subset_log, level=logging.DEBUG) len_rds = len(rds) field = [None] * len_rds for ii in range(len_rds): rds_element = rds[ii] try: field_object = rds_element.get( format_time=self.ops.format_time, grid_abstraction=self.ops.abstraction) except (AttributeError, TypeError): # Likely a field object which does not need to be loaded from source. if not self.ops.format_time: raise NotImplementedError # Check that is indeed a field before a proceeding. if not isinstance(rds_element, Field): raise field_object = rds_element field[ii] = field_object # Multivariate calculations require pulling variables across fields. if self._has_multivariate_calculations and len(field) > 1: for midx in range(1, len(field)): # Use the data variable tag if it is available. Otherwise, attempt to merge the fields raising # warning if the variable exists in the squashed field. if len(field[midx].data_variables) > 0: vitr = field[midx].data_variables is_data = True else: vitr = list(field[midx].values()) is_data = False for mvar in vitr: mvar = mvar.extract() field[0].add_variable(mvar, is_data=is_data) new_field_name = '_'.join([str(f.name) for f in field]) field[0].set_name(new_field_name) # The first field in the list is always the target for other operations. field = field[0] assert isinstance(field, Field) # Break out of operations if the rank is empty. vm.create_subcomm_by_emptyable(SubcommName.FIELD_GET, field, is_current=True, clobber=True) if not vm.is_null: if not has_field_optimizations: if field.is_empty: raise ValueError('No empty fields allowed.') # Time, level, etc. subsets. field = self._get_nonspatial_subset_(field) # Spatially reorder the data. ocgis_lh(msg='before spatial reorder', logger=self._subset_log, level=logging.DEBUG) if self.ops.spatial_reorder: self._update_spatial_order_(field) # Extrapolate the spatial bounds if requested. # TODO: Rename "interpolate" to "extrapolate". if self.ops.interpolate_spatial_bounds: self._update_bounds_extrapolation_(field) # This error is related to subsetting by time or level. Spatial subsetting occurs below. except EmptySubsetError as e: if self.ops.allow_empty: ocgis_lh( msg='time or level subset empty but empty returns allowed', logger=self._subset_log, level=logging.WARN) coll = self._get_initialized_collection_() name = '_'.join([rd.field_name for rd in rds]) field = Field(name=name, is_empty=True) coll.add_field(field, None) try: yield coll finally: return else: # Raise an exception as empty subsets are not allowed. ocgis_lh(exc=ExtentError(message=str(e)), alias=str([rd.field_name for rd in rds]), logger=self._subset_log) # Set iterator based on presence of slice. Slice always overrides geometry. if self.ops.slice is not None: itr = [None] else: itr = [None] if self.ops.geom is None else self.ops.geom for coll in self._process_geometries_(itr, field, alias): # Conform units following the spatial subset. if not vm.is_null and self.ops.conform_units_to is not None: for to_conform in coll.iter_fields(): for dv in to_conform.data_variables: dv.cfunits_conform(self.ops.conform_units_to) ocgis_lh(msg='_process_subsettables_ yielding', logger=self._subset_log, level=logging.DEBUG) yield coll
def validate(cls, ops): if ops.calc_grouping is None: from ocgis.ops.parms.definition import Calc msg = 'Set functions must have a temporal grouping.' ocgis_lh(exc=DefinitionValidationError(Calc, msg), logger='calc.base')
def iter_src_grid_subsets(self, yield_dst=False, yield_idx=None): """ Yield source grid subset using the extent of its associated destination grid subset. :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset. :param int yield_idx: If a zero-based integer, only yield for this chunk index and skip everything else. :rtype: tuple(:class:`ocgis.spatial.grid.AbstractGrid`, `slice-like`) """ if yield_dst: yield_slice = True else: yield_slice = False buffer_value = self.buffer_value dst_grid_wrapped_state = self.dst_grid.wrapped_state dst_grid_crs = self.dst_grid.crs # Use a destination grid iterator if provided. if self.iter_dst is not None: iter_dst = self.iter_dst(self, yield_slice=yield_slice, yield_idx=yield_idx) else: iter_dst = self.iter_dst_grid_subsets(yield_slice=yield_slice, yield_idx=yield_idx) # Loop over each destination grid subset. ocgis_lh(logger='grid_chunker', msg='starting "for yld in iter_dst"', level=logging.DEBUG) for yld in iter_dst: if yield_slice: dst_grid_subset, dst_slice = yld else: dst_grid_subset = yld dst_box = None with vm.scoped_by_emptyable('extent_global', dst_grid_subset): if not vm.is_null: # Use the extent of the polygon for determining the bounding box. This ensures conservative # regridding will be fully mapped. if isinstance(dst_grid_subset, AbstractGeometryCoordinates): target_grid = dst_grid_subset.parent.grid else: target_grid = dst_grid_subset extent_global = target_grid.parent.attrs.get( 'extent_global') if extent_global is None: with grid_abstraction_scope(target_grid, Topology.POLYGON): extent_global = target_grid.extent_global if self.check_contains: dst_box = box(*target_grid.extent_global) sub_box = box(*extent_global) if buffer_value is not None: # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box. sub_box = sub_box.buffer(buffer_value).envelope ocgis_lh(msg=str(sub_box.bounds), level=logging.DEBUG, logger='grid_chunker') else: sub_box, dst_box = [None, None] live_ranks = vm.get_live_ranks_from_object(dst_grid_subset) sub_box = vm.bcast(sub_box, root=live_ranks[0]) if self.check_contains: dst_box = vm.bcast(dst_box, root=live_ranks[0]) sub_box = GeometryVariable.from_shapely( sub_box, is_bbox=True, wrapped_state=dst_grid_wrapped_state, crs=dst_grid_crs) ocgis_lh(logger='grid_chunker', msg='starting "self.src_grid.get_intersects"', level=logging.DEBUG) src_grid_subset, src_grid_slice = self.src_grid.get_intersects( sub_box, keep_touches=False, cascade=False, optimized_bbox_subset=self.optimized_bbox_subset, return_slice=True) ocgis_lh(logger='grid_chunker', msg='finished "self.src_grid.get_intersects"', level=logging.DEBUG) # Reload the data using a new source index distribution. if hasattr(src_grid_subset, 'reduce_global') and src_grid_subset.cindex is not None: # Only redistribute if we have one live rank. if self.redistribute and len( vm.get_live_ranks_from_object(src_grid_subset)) > 0: ocgis_lh(logger='grid_chunker', msg='starting redistribute', level=logging.DEBUG) topology = src_grid_subset.abstractions_available[ Topology.POLYGON] cindex = topology.cindex redist_dimname = self.src_grid.abstractions_available[ Topology.POLYGON].element_dim.name if src_grid_subset.is_empty: redist_dim = None else: redist_dim = topology.element_dim redistribute_by_src_idx(cindex, redist_dimname, redist_dim) ocgis_lh(logger='grid_chunker', msg='finished redistribute', level=logging.DEBUG) with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset): if not vm.is_null: if not self.allow_masked: gmask = src_grid_subset.get_mask() if gmask is not None and gmask.any(): raise ValueError( 'Masked values in source grid subset.') if self.check_contains: src_box = box(*src_grid_subset.extent_global) if not does_contain(src_box, dst_box): raise ValueError('Contains check failed.') # Try to reduce the coordinates in the case of unstructured grid data. if hasattr(src_grid_subset, 'reduce_global' ) and src_grid_subset.cindex is not None: ocgis_lh(logger='grid_chunker', msg='starting reduce_global', level=logging.DEBUG) src_grid_subset = src_grid_subset.reduce_global() ocgis_lh(logger='grid_chunker', msg='finished reduce_global', level=logging.DEBUG) else: pass # src_grid_subset = VariableCollection(is_empty=True) if src_grid_subset.is_empty: src_grid_slice = None else: src_grid_slice = { src_grid_subset.dimensions[ii].name: src_grid_slice[ii] for ii in range(src_grid_subset.ndim) } if yield_dst: yld = (src_grid_subset, src_grid_slice, dst_grid_subset, dst_slice) else: yld = src_grid_subset, src_grid_slice yield yld
def execute(self, coll, file_only=False, tgds=None): """ :param :class:~`ocgis.SpatialCollection` coll: :param bool file_only: :param dict tgds: {'field_alias': :class:`ocgis.interface.base.dimension.temporal.TemporalGroupDimension`,...} """ from ocgis import VariableCollection # Select which dictionary will hold the temporal group dimensions. if tgds is None: tgds_to_use = self._tgds tgds_overloaded = False else: tgds_to_use = tgds tgds_overloaded = True # Group the variables. If grouping is None, calculations are performed on each element. if self.grouping is not None: ocgis_lh('Setting temporal groups: {0}'.format(self.grouping), 'calc.engine') for field in coll.iter_fields(): if tgds_overloaded: assert field.name in tgds_to_use else: if field.name not in tgds_to_use: tgds_to_use[field.name] = field.time.get_grouping( self.grouping) # Iterate over functions. for ugid, container in list(coll.children.items()): for field_name, field in list(container.children.items()): new_temporal = tgds_to_use.get(field_name) if new_temporal is not None: new_temporal = new_temporal.copy() # If the engine has a grouping, ensure it is equivalent to the new temporal dimension. if self.grouping is not None: try: compare = set(new_temporal.grouping) == set( self.grouping) # Types may be unhashable, compare directly. except TypeError: compare = new_temporal.grouping == self.grouping if not compare: msg = 'Engine temporal grouping and field temporal grouping are not equivalent. Perhaps ' \ 'optimizations are incorrect?' ocgis_lh(logger='calc.engine', exc=ValueError(msg)) out_vc = VariableCollection() for f in self.funcs: try: ocgis_lh('Calculating: {0}'.format(f['func']), logger='calc.engine') # Initialize the function. function = f['ref']( alias=f['name'], dtype=None, field=field, file_only=file_only, vc=out_vc, parms=f['kwds'], tgd=new_temporal, calc_sample_size=self.calc_sample_size, meta_attrs=f.get('meta_attrs'), spatial_aggregation=self.spatial_aggregation) # Allow a calculation to create a temporal aggregation after initialization. if new_temporal is None and function.tgd is not None: new_temporal = function.tgd.extract() except KeyError: # Likely an eval function which does not have the name key. function = EvalFunction( field=field, file_only=file_only, vc=out_vc, expr=self.funcs[0]['func'], meta_attrs=self.funcs[0].get('meta_attrs')) ocgis_lh('calculation initialized', logger='calc.engine', level=logging.DEBUG) # Return the variable collection from the calculations. out_vc = function.execute() for dv in out_vc.values(): # Any outgoing variables from a calculation must have an associated data type. try: assert dv.dtype is not None except AssertionError: assert isinstance(dv.dtype, np.dtype) # If this is a file only operation, there should be no computed values. if file_only: assert dv._value is None ocgis_lh('calculation finished', logger='calc.engine', level=logging.DEBUG) # Try to mark progress. Okay if it is not there. try: self._progress.mark() except AttributeError: pass out_field = function.field.copy() function_tag = function.tag # Format the returned field. Doing things like removing original data variables and modifying the # time dimension if necessary. Field functions handle all field modifications on their own, so bypass # in that case. if new_temporal is not None: new_temporal = new_temporal.extract() format_return_field(function_tag, out_field, new_temporal=new_temporal) # Add the calculation variables. for variable in list(out_vc.values()): with orphaned(variable): out_field.add_variable(variable) # Tag the calculation data as data variables. out_field.append_to_tags(function_tag, list(out_vc.keys())) coll.children[ugid].children[field_name] = out_field return coll
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights, esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist, eager, ignore_degenerate, data_variables, spatial_subset_path, verbose, loglvl, weightfilemode, large_file): # Used for creating the history string. the_locals = locals() if verbose: ocgis_lh.configure(to_stream=True, level=getattr(logging, loglvl)) ocgis_lh(msg="Starting Chunked Regrid Weight Generation", level=logging.INFO, logger=CRWG_LOG) if not ocgis.env.USE_NETCDF4_MPI: msg = ( 'env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is ' 'netCDF4-python built with parallel support?') ocgis_lh(msg, level=logging.WARN, logger=CRWG_LOG, force=True) if data_variables is not None: data_variables = data_variables.split(',') if nchunks_dst is not None: # Format the chunking decomposition from its string representation. if ',' in nchunks_dst: nchunks_dst = nchunks_dst.split(',') else: nchunks_dst = [nchunks_dst] nchunks_dst = tuple([int(ii) for ii in nchunks_dst]) if merge: if not spatial_subset and weight is None: raise ValueError('"weight" must be a valid path if --merge') if spatial_subset and genweights and weight is None: raise ValueError('"weight" must be a valid path if --genweights') # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets # and it is not a merge only operation. should_create_wd = (nchunks_dst is None or not all([ii == 1 for ii in nchunks_dst])) or spatial_subset if should_create_wd: if wd is None: if ocgis.vm.rank == 0: wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_') wd = ocgis.vm.bcast(wd) else: exc = None if ocgis.vm.rank == 0: # The working directory must not exist to proceed. if nchunks_dst is not None: if os.path.exists(wd): exc = ValueError( "Working directory {} must not exist.".format(wd)) else: # Make the working directory nesting as needed. os.makedirs(wd) exc = ocgis.vm.bcast(exc) if exc is not None: raise exc if merge and not spatial_subset or (spatial_subset and genweights): if _is_subdir_(wd, weight): raise ValueError( 'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.' ) # Create the source and destination request datasets. rd_src = _create_request_dataset_(source, esmf_src_type, data_variables=data_variables) rd_dst = _create_request_dataset_(destination, esmf_dst_type) # Execute a spatial subset if requested. paths = None if spatial_subset: if spatial_subset_path is None: spatial_subset_path = os.path.join(wd, 'spatial_subset.nc') msg = "Executing spatial subset. Output path is: {}".format( spatial_subset_path) ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path, src_resmax=src_resolution) # Only split grids if a spatial subset is not requested. else: # Update the paths to use for the grid. paths = {'wd': wd} # Arguments to ESMF regridding. esmf_kwargs = { 'regrid_method': esmf_regrid_method, 'ignore_degenerate': ignore_degenerate, } # Allow older versions of ESMF to work with a default large file flag. if large_file: esmf_kwargs['large_file'] = large_file # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset. gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths, dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True, genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager, filemode=weightfilemode) # Write subsets and generate weights if requested in the grid splitter. # TODO: Need a weight only option. If chunks are written, then weights are written... if not spatial_subset and nchunks_dst is not None and not gs.is_one_chunk: msg = "Starting main chunking loop..." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) gs.write_chunks() else: if spatial_subset: source = spatial_subset_path if genweights: msg = "Writing ESMF weights..." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) handle_weight_file_check(weight) gs.write_esmf_weights(source, destination, weight, filemode=weightfilemode) # Create the global weight file. This does not apply to spatial subsets because there will always be one weight # file. if merge and not spatial_subset and not gs.is_one_chunk: # Weight file merge only works in serial. exc = None with ocgis.vm.scoped('weight file merge', [0]): if not ocgis.vm.is_null: msg = "Merging chunked weight files to global file. Output global weight file is: {}".format( weight) ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) handle_weight_file_check(weight) gs.create_merged_weight_file(weight) excs = ocgis.vm.gather(exc) excs = ocgis.vm.bcast(excs) for exc in excs: if exc is not None: raise exc ocgis.vm.barrier() # Append the history string if there is an output weight file. if weight and ocgis.vm.rank == 0: if os.path.exists(weight): # Add some additional stuff for record keeping import getpass import socket import datetime with nc.Dataset(weight, 'a') as ds: ds.setncattr('created_by_user', getpass.getuser()) ds.setncattr('created_on_hostname', socket.getfqdn()) ds.setncattr('history', create_history_string(the_locals)) ocgis.vm.barrier() # Remove the working directory unless the persist flag is provided. if not persist: if ocgis.vm.rank == 0: msg = "Removing working directory since persist is False." ocgis_lh(msg=msg, level=logging.INFO, logger=CRWG_LOG) shutil.rmtree(wd) ocgis.vm.barrier() ocgis_lh(msg="Success!", level=logging.INFO, logger=CRWG_LOG) return 0
def test_system_parallel(self): to_file = os.path.join(self.current_dir_output, 'rank-{}-test_ocgis_log.log'.format(vm.rank)) ocgis_lh.configure(to_file=to_file) ocgis_lh("something happened") self.assertEqual(len(os.listdir(self.current_dir_output)), vm.size)
def iter_src_grid_subsets(self, yield_dst=False, yield_idx=None): """ Yield source grid subset using the extent of its associated destination grid subset. :param bool yield_dst: If ``True``, yield the destination subset as well as the source grid subset. :param int yield_idx: If a zero-based integer, only yield for this chunk index and skip everything else. :rtype: tuple(:class:`ocgis.spatial.grid.AbstractGrid`, `slice-like`) """ if yield_dst: yield_slice = True else: yield_slice = False buffer_value = self.buffer_value dst_grid_wrapped_state = self.dst_grid.wrapped_state dst_grid_crs = self.dst_grid.crs # Use a destination grid iterator if provided. if self.iter_dst is not None: iter_dst = self.iter_dst(self, yield_slice=yield_slice, yield_idx=yield_idx) else: iter_dst = self.iter_dst_grid_subsets(yield_slice=yield_slice, yield_idx=yield_idx) # Loop over each destination grid subset. ocgis_lh(logger=_LOCAL_LOGGER, msg='starting "for yld in iter_dst"', level=logging.DEBUG) for iter_dst_ctr, yld in enumerate(iter_dst, start=1): ocgis_lh(msg=["iter_dst_ctr", iter_dst_ctr], level=logging.DEBUG) if yield_slice: dst_grid_subset, dst_slice = yld else: dst_grid_subset = yld # All masked destinations are very problematic for ESMF with vm.scoped_by_emptyable('global mask', dst_grid_subset): if not vm.is_null: if dst_grid_subset.has_mask_global: if dst_grid_subset.has_mask and dst_grid_subset.has_masked_values: all_masked = dst_grid_subset.get_mask().all() else: all_masked = False all_masked_gather = vm.gather(all_masked) if vm.rank == 0: if all(all_masked_gather): exc = ValueError( "Destination subset all masked") try: raise exc finally: vm.abort(exc=exc) dst_box = None with vm.scoped_by_emptyable('extent_global', dst_grid_subset): if not vm.is_null: # Use the extent of the polygon for determining the bounding box. This ensures conservative # regridding will be fully mapped. if isinstance(dst_grid_subset, AbstractGeometryCoordinates): target_grid = dst_grid_subset.parent.grid else: target_grid = dst_grid_subset # Try to reduce the coordinates in the case of unstructured grid data. Ensure the data also has a # coordinate index. SCRIP grid files, for example, do not have a coordinate index like UGRID. if hasattr( target_grid, 'reduce_global' ) and Topology.POLYGON in target_grid.abstractions_available and target_grid.cindex is not None: ocgis_lh( logger=_LOCAL_LOGGER, msg='starting reduce_global for dst_grid_subset', level=logging.DEBUG) target_grid = target_grid.reduce_global() ocgis_lh( logger=_LOCAL_LOGGER, msg='finished reduce_global for dst_grid_subset', level=logging.DEBUG) extent_global = target_grid.parent.attrs.get( 'extent_global') if extent_global is None: with grid_abstraction_scope(target_grid, Topology.POLYGON): extent_global = target_grid.extent_global # HACK: Bad corner coordinates can lead to bad extents. In this case, the lower bound on the # x-coordinate is unreasonable and breaks wrapping code. Set to 0.0 which is a reasonable # lower x-coordate for unwrapped datasets. if (isinstance(target_grid.crs, Spherical)) and \ dst_grid_wrapped_state == WrappedState.UNWRAPPED and \ extent_global[0] < 0.0: e = list(extent_global) e[0] = 0.0 extent_global = tuple(e) if self.check_contains: dst_box = box(*target_grid.extent_global) sub_box = box(*extent_global) if buffer_value is not None: # Use the envelope! A buffer returns "fancy" borders. We just want to expand the bounding box. sub_box = sub_box.buffer(buffer_value).envelope ocgis_lh(msg=str(sub_box.bounds), level=logging.DEBUG, logger=_LOCAL_LOGGER) else: sub_box, dst_box = [None, None] live_ranks = vm.get_live_ranks_from_object(dst_grid_subset) sub_box = vm.bcast(sub_box, root=live_ranks[0]) if self.check_contains: dst_box = vm.bcast(dst_box, root=live_ranks[0]) sub_box = GeometryVariable.from_shapely( sub_box, is_bbox=True, wrapped_state=dst_grid_wrapped_state, crs=dst_grid_crs) # Prepare geometry to match coordinate system and wrapping of the subset target sub_box = sub_box.prepare(archetype=self.src_grid) ocgis_lh(logger=_LOCAL_LOGGER, msg='prepared geometry', level=logging.DEBUG) ocgis_lh(logger=_LOCAL_LOGGER, msg='starting "self.src_grid.get_intersects"', level=logging.DEBUG) src_grid_subset, src_grid_slice = self.src_grid.get_intersects( sub_box, keep_touches=False, cascade=False, optimized_bbox_subset=self.optimized_bbox_subset, return_slice=True) ocgis_lh(logger=_LOCAL_LOGGER, msg='finished "self.src_grid.get_intersects"', level=logging.DEBUG) # Reload the data using a new source index distribution. if hasattr(src_grid_subset, 'reduce_global') and src_grid_subset.cindex is not None: # Only redistribute if we have one live rank. if self.redistribute and len( vm.get_live_ranks_from_object(src_grid_subset)) > 0: ocgis_lh(logger=_LOCAL_LOGGER, msg='starting redistribute', level=logging.DEBUG) topology = src_grid_subset.abstractions_available[ Topology.POLYGON] cindex = topology.cindex redist_dimname = self.src_grid.abstractions_available[ Topology.POLYGON].element_dim.name if src_grid_subset.is_empty: redist_dim = None else: redist_dim = topology.element_dim redistribute_by_src_idx(cindex, redist_dimname, redist_dim) ocgis_lh(logger=_LOCAL_LOGGER, msg='finished redistribute', level=logging.DEBUG) with vm.scoped_by_emptyable('src_grid_subset', src_grid_subset): if not vm.is_null: if not self.allow_masked: gmask = src_grid_subset.get_mask() if gmask is not None and gmask.any(): raise ValueError( 'Masked values in source grid subset.') if self.check_contains: src_box = box(*src_grid_subset.extent_global) if not does_contain(src_box, dst_box): raise ValueError('Contains check failed.') # Try to reduce the coordinates in the case of unstructured grid data. if hasattr(src_grid_subset, 'reduce_global' ) and src_grid_subset.cindex is not None: ocgis_lh(logger=_LOCAL_LOGGER, msg='starting reduce_global', level=logging.DEBUG) src_grid_subset = src_grid_subset.reduce_global() ocgis_lh(logger=_LOCAL_LOGGER, msg='finished reduce_global', level=logging.DEBUG) else: pass # src_grid_subset = VariableCollection(is_empty=True) if src_grid_subset.is_empty: src_grid_slice = None else: src_grid_slice = { src_grid_subset.dimensions[ii].name: src_grid_slice[ii] for ii in range(src_grid_subset.ndim) } if yield_dst: yld = (src_grid_subset, src_grid_slice, dst_grid_subset, dst_slice) else: yld = src_grid_subset, src_grid_slice yield yld
def chunked_rwg(source, destination, weight, nchunks_dst, merge, esmf_src_type, esmf_dst_type, genweights, esmf_regrid_method, spatial_subset, src_resolution, dst_resolution, buffer_distance, wd, persist, eager, ignore_degenerate): if not ocgis.env.USE_NETCDF4_MPI: msg = ('env.USE_NETCDF4_MPI is False. Considerable performance gains are possible if this is True. Is ' 'netCDF4-python built with parallel support?') ocgis_lh(msg, level=logging.WARN, logger='ocli.chunked_rwg', force=True) if nchunks_dst is not None: # Format the chunking decomposition from its string representation. if ',' in nchunks_dst: nchunks_dst = nchunks_dst.split(',') else: nchunks_dst = [nchunks_dst] nchunks_dst = tuple([int(ii) for ii in nchunks_dst]) if merge: if not spatial_subset and weight is None: raise ValueError('"weight" must be a valid path if --merge') if spatial_subset and genweights and weight is None: raise ValueError('"weight" must be a valid path if --genweights') # Make a temporary working directory is one is not provided by the client. Only do this if we are writing subsets # and it is not a merge only operation. if wd is None: if ocgis.vm.rank == 0: wd = tempfile.mkdtemp(prefix='ocgis_chunked_rwg_') wd = ocgis.vm.bcast(wd) else: if ocgis.vm.rank == 0: # The working directory must not exist to proceed. if os.path.exists(wd): raise ValueError("Working directory 'wd' must not exist.") else: # Make the working directory nesting as needed. os.makedirs(wd) ocgis.vm.barrier() if merge and not spatial_subset or (spatial_subset and genweights): if _is_subdir_(wd, weight): raise ValueError( 'Merge weight file path must not in the working directory. It may get unintentionally deleted with the --no_persist flag.') # Create the source and destination request datasets. rd_src = _create_request_dataset_(source, esmf_src_type) rd_dst = _create_request_dataset_(destination, esmf_dst_type) # Execute a spatial subset if requested. paths = None if spatial_subset: # TODO: This path should be customizable. spatial_subset_path = os.path.join(wd, 'spatial_subset.nc') _write_spatial_subset_(rd_src, rd_dst, spatial_subset_path) # Only split grids if a spatial subset is not requested. else: # Update the paths to use for the grid. paths = {'wd': wd} # Arguments to ESMF regridding. esmf_kwargs = {'regrid_method': esmf_regrid_method, 'ignore_degenerate': ignore_degenerate} # Create the chunked regridding object. This is used for both chunked regridding and a regrid with a spatial subset. gs = GridChunker(rd_src, rd_dst, nchunks_dst=nchunks_dst, src_grid_resolution=src_resolution, paths=paths, dst_grid_resolution=dst_resolution, buffer_value=buffer_distance, redistribute=True, genweights=genweights, esmf_kwargs=esmf_kwargs, use_spatial_decomp='auto', eager=eager) # Write subsets and generate weights if requested in the grid splitter. # TODO: Need a weight only option. If chunks are written, then weights are written... if not spatial_subset and nchunks_dst is not None: gs.write_chunks() else: if spatial_subset: source = spatial_subset_path if genweights: gs.write_esmf_weights(source, destination, weight) # Create the global weight file. This does not apply to spatial subsets because there will always be one weight # file. if merge and not spatial_subset: # Weight file merge only works in serial. exc = None with ocgis.vm.scoped('weight file merge', [0]): if not ocgis.vm.is_null: gs.create_merged_weight_file(weight) excs = ocgis.vm.gather(exc) excs = ocgis.vm.bcast(excs) for exc in excs: if exc is not None: raise exc ocgis.vm.barrier() # Remove the working directory unless the persist flag is provided. if not persist: if ocgis.vm.rank == 0: shutil.rmtree(wd) ocgis.vm.barrier() return 0
def write_chunks(self): """ Write grid subsets to netCDF files using the provided filename templates. This will also generate ESMF regridding weights for each subset if requested. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] src_slices = [] index_path = self.create_full_path_from_template('index_file') # nzeros = len(str(reduce(lambda x, y: x * y, self.nchunks_dst))) ctr = 1 ocgis_lh(logger=_LOCAL_LOGGER, msg='starting self.iter_src_grid_subsets', level=logging.DEBUG) for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets( yield_dst=True): ocgis_lh( logger=_LOCAL_LOGGER, msg='finished iteration {} for self.iter_src_grid_subsets'. format(ctr), level=logging.DEBUG) src_path = self.create_full_path_from_template('src_template', index=ctr) dst_path = self.create_full_path_from_template('dst_template', index=ctr) wgt_path = self.create_full_path_from_template('wgt_template', index=ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_path) dst_slices.append(dst_slc) src_slices.append(src_slc) # Only write destinations if an iterator is not provided. if self.iter_dst is None: zip_args = [[sub_src, sub_dst], [src_path, dst_path]] else: zip_args = [[sub_src], [src_path]] cc = 1 for target, path in zip(*zip_args): with vm.scoped_by_emptyable('field.write' + str(cc), target): if not vm.is_null: ocgis_lh(logger=_LOCAL_LOGGER, msg='write_chunks:writing: {}'.format(path), level=logging.DEBUG) field = Field(grid=target) field.write(path) ocgis_lh( logger=_LOCAL_LOGGER, msg='write_chunks:finished writing: {}'.format( path), level=logging.DEBUG) cc += 1 # Increment the counter outside of the loop to avoid counting empty subsets. ctr += 1 # Generate an ESMF weights file if requested and at least one rank has data on it. if self.genweights and len( vm.get_live_ranks_from_object(sub_src)) > 0: vm.barrier() ocgis_lh(logger=_LOCAL_LOGGER, msg='write_chunks:writing esmf weights: {}'.format( wgt_path), level=logging.DEBUG) self.write_esmf_weights(src_path, dst_path, wgt_path, src_grid=sub_src, dst_grid=sub_dst) vm.barrier() # Global shapes require a VM global scope to collect. src_global_shape = global_grid_shape(self.src_grid) dst_global_shape = global_grid_shape(self.dst_grid) # Gather and collapse source slices as some may be empty and we write on rank 0. gathered_src_grid_slice = vm.gather(src_slices) if vm.rank == 0: len_src_slices = len(src_slices) new_src_grid_slice = [None] * len_src_slices for idx in range(len_src_slices): for rank_src_grid_slice in gathered_src_grid_slice: if rank_src_grid_slice[idx] is not None: new_src_grid_slice[idx] = rank_src_grid_slice[idx] break src_slices = new_src_grid_slice with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = [ 'source_filename', 'destination_filename', 'weights_filename' ] values = [src_filenames, dst_filenames, wgt_filenames] grid_chunker_destination = GridChunkerConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{ 'esmf_role': 'grid_chunker_source' }, { 'esmf_role': grid_chunker_destination }, { 'esmf_role': 'grid_chunker_weights' }] vc = VariableCollection() grid_chunker_index = GridChunkerConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_chunker_index) vidx.attrs['esmf_role'] = grid_chunker_index vidx.attrs['grid_chunker_source'] = 'source_filename' vidx.attrs[GridChunkerConstants.IndexFile. NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_chunker_weights'] = 'weights_filename' vidx.attrs[GridChunkerConstants.IndexFile. NAME_SRC_GRID_SHAPE] = src_global_shape vidx.attrs[GridChunkerConstants.IndexFile. NAME_DST_GRID_SHAPE] = dst_global_shape vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) # TODO: This needs to work with four dimensions. # Source ----------------------------------------------------------------------------------------------- self.src_grid._gc_create_index_bounds_(RegriddingRole.SOURCE, vidx, vc, src_slices, dim, bounds_dimension) # Destination ------------------------------------------------------------------------------------------ self.dst_grid._gc_create_index_bounds_( RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim, bounds_dimension) vc.write(index_path) vm.barrier()
def execute(self, coll, file_only=False): ## switch collection type based on the types of calculations present if self._check_calculation_members_(self.funcs, OcgCvArgFunction): klass = MultivariateCalcCollection elif self._check_calculation_members_(self.funcs, KeyedFunctionOutput): klass = KeyedOutputCalcCollection else: klass = CalcCollection ret = klass(coll, funcs=self.funcs) ocgis_lh(msg='returning collection of type {0}'.format(coll.__class__), logger='calc.engine') ## group the variables. if grouping is None, calculations are performed ## on each element. array computations are taken advantage of. if self.grouping is not None: ocgis_lh('setting temporal grouping(s)', 'calc.engine') for ds in coll.variables.itervalues(): ds.temporal.set_grouping(self.grouping) ## iterate over functions for f in self.funcs: ocgis_lh('calculating: {0}'.format(f), logger='calc.engine') ## change behavior for multivariate functions if issubclass(f['ref'], OcgCvArgFunction) or ( isinstance(ret, MultivariateCalcCollection) and f['ref'] == SampleSize): ## do not calculated sample size for multivariate calculations ## yet if f['ref'] == SampleSize: ocgis_lh( 'sample size calculations not implemented for multivariate calculations yet', 'calc.engine', level=logging.WARN) continue ## cv-controlled multivariate functions require collecting ## data arrays before passing to function. kwds = f['kwds'].copy() ## reference the appropriate datasets to pass to the calculation keyed_datasets = {} for ii, key in enumerate(f['ref'].keys): ## the name of the variable passed in the request ## that should be mapped to the named argument backref = kwds[key] ## pull associated data dref = coll.variables[backref] ## map the key to a dataset keyed_datasets.update({key: dref}) value, weights = self._get_value_weights_( dref, file_only=file_only) ## get the calculation groups and weights. if ii == 0: if self.grouping is None: dgroups = None else: dgroups = dref.temporal.group.dgroups ## update dict with properly reference data kwds.update({key: value}) ## function object instance ref = f['ref'](agg=self.agg, groups=dgroups, kwds=kwds, weights=weights, dataset=keyed_datasets, calc_name=f['name'], file_only=file_only) calc = ref.calculate() ## store calculation value ret.calc[f['name']] = calc else: ## perform calculation on each variable for alias, var in coll.variables.iteritems(): if alias not in ret.calc: ret.calc[alias] = OrderedDict() value, weights = self._get_value_weights_( var, file_only=file_only) ## make the function instance try: ref = f['ref'](values=value, agg=self.agg, groups=var.temporal.group.dgroups, kwds=f['kwds'], weights=weights, dataset=var, calc_name=f['name'], file_only=file_only) except AttributeError: ## if there is no grouping, there is no need to calculate ## sample size. if self.grouping is None and f['ref'] == SampleSize: break elif self.grouping is None: e = NotImplementedError( 'Univariate calculations must have a temporal grouping.' ) ocgis_lh(exc=e, logger='calc.engine') else: raise ## calculate the values calc = ref.calculate() ## store the values ret.calc[alias][f['name']] = calc return (ret)
def reduce_reindex_coordinate_index(cindex, start_index=0): """ Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure. Function will not respect masks. The function returns a two-element tuple: * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing. * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external coordinate storage variable or array. :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may also be a NumPy array. :type cindex: :class:`~ocgis.Variable` | :class:`~numpy.ndarray` :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``. :rtype: tuple """ ocgis_lh(msg='entering reduce_reindex_coordinate_index', logger='geomc', level=logging.DEBUG) # Get the coordinate index values as a NumPy array. try: ocgis_lh(msg='calling cindex.get_value()', logger='geomc', level=logging.DEBUG) ocgis_lh(msg='cindex.has_allocated_value={}'.format(cindex.has_allocated_value), logger='geomc', level=logging.DEBUG) ocgis_lh(msg='cindex.dimensions[0]={}'.format(cindex.dimensions[0]), logger='geomc', level=logging.DEBUG) cindex = cindex.get_value() ocgis_lh(msg='finished cindex.get_value()', logger='geomc', level=logging.DEBUG) except AttributeError: # Assume this is already a NumPy array. pass # Only work with 1D arrays. cindex = np.atleast_1d(cindex) # Used to return the coordinate index to the original shape of the incoming coordinate index. original_shape = cindex.shape cindex = cindex.flatten() # Create the unique coordinate index array. ocgis_lh(msg='calling create_unique_global_array', logger='geomc', level=logging.DEBUG) if vm.size > 1: u = np.array(create_unique_global_array(cindex)) else: u = np.unique(cindex) ocgis_lh(msg='finished create_unique_global_array', logger='geomc', level=logging.DEBUG) # Synchronize the data type for the new coordinate index. lrank = vm.rank if lrank == 0: dtype = u.dtype else: dtype = None dtype = vm.bcast(dtype) # Flag to indicate if the current rank has any unique values. has_u = len(u) > 0 # Create the new coordinate index. new_u_dimension = create_distributed_dimension(len(u), name='__new_u_dimension__') new_u = arange_from_dimension(new_u_dimension, start=start_index, dtype=dtype) # Create a hash for the new index. This is used to remap the old coordinate index. if has_u: uidx = {ii: jj for ii, jj in zip(u, new_u)} else: uidx = None vm.barrier() # Construct local bounds for the rank's unique value. This is used as a cheap index when ranks are looking for # index overlaps. if has_u: local_bounds = min(u), max(u) else: local_bounds = None # Put a copy for the bounds indexing on each rank. lb_global = vm.gather(local_bounds) lb_global = vm.bcast(lb_global) # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds. overlaps = [] for rank, lb in enumerate(lb_global): if rank == lrank: continue if lb is not None: contains = lb[0] <= cindex contains = np.logical_and(lb[1] >= cindex, contains) if np.any(contains): overlaps.append(rank) # Ranks must be able to identify which ranks will be asking them for data. global_overlaps = vm.gather(overlaps) global_overlaps = vm.bcast(global_overlaps) destinations = [ii for ii, jj in enumerate(global_overlaps) if vm.rank in jj] # MPI communication tags used in the algorithm. tag_search = MPITag.REDUCE_REINDEX_SEARCH tag_success = MPITag.REDUCE_REINDEX_SUCCESS tag_child_finished = MPITag.REDUCE_REINDEX_CHILD_FINISHED tag_found = MPITag.REDUCE_REINDEX_FOUND # Fill array for the new coordinate index. new_cindex = np.empty_like(cindex) # vm.barrier_print('starting run_rr') # Fill the new coordinate indexing. if lrank == 0: run_rr_root(new_cindex, cindex, uidx, destinations, tag_child_finished, tag_found, tag_search, tag_success) else: run_rr_nonroot(new_cindex, cindex, uidx, destinations, has_u, overlaps, tag_child_finished, tag_found, tag_search, tag_success) # vm.barrier_print('finished run_rr') # Return array to its original shape. new_cindex = new_cindex.reshape(*original_shape) vm.barrier() return new_cindex, u
def aggregate_spatial(self, fill): exc = NotImplementedError( 'Spatial aggregation of raw input values not implemented for keyed output functions.' ) ocgis_lh(exc=exc, logger='calc.library')
def execute(self): # check for a user-supplied output prefix prefix = self.ops.prefix # do directory management # # flag to indicate a directory is made. mostly a precaution to make sure the appropriate directory is is # removed. made_output_directory = False if self.ops.output_format in self._no_directory: # No output directory for some formats. outdir = None else: # Directories or a single output file(s) is created for the other cases. if self.ops.add_auxiliary_files: # Auxiliary files require that a directory be created. outdir = os.path.join(self.ops.dir_output, prefix) # Create and/or remove the output directory. if vm.rank == 0: if os.path.exists(outdir): if env.OVERWRITE: shutil.rmtree(outdir) else: raise IOError('The output directory exists but env.OVERWRITE is False: {0}'.format(outdir)) os.mkdir(outdir) # Block until output directory is created. Most often the zero rank manages writing, but this is not a # requirement. vm.Barrier() # On an exception, the output directory needs to be removed. made_output_directory = True else: # with no auxiliary files the output directory will do just fine outdir = self.ops.dir_output try: # configure logging ######################################################################################## progress = self._get_progress_and_configure_logging_(outdir, prefix) # create local logger interpreter_log = ocgis_lh.get_logger('interpreter') ocgis_lh('Initializing...', interpreter_log) # set up environment ####################################################################################### # run validation - doesn't do much now self.check() # do not perform vector wrapping for NetCDF output if self.ops.output_format == 'nc': ocgis_lh('"vector_wrap" set to False for netCDF output', interpreter_log, level=logging.WARN) self.ops.vector_wrap = False # if the requested output format is "meta" then no operations are run and only the operations dictionary is # required to generate output. Converter = self.ops._get_object_(OutputFormat.name).get_converter_class() if issubclass(Converter, AbstractMetaConverter): ret = Converter(self.ops).write() # this is the standard request for other output types. else: # the operations object performs subsetting and calculations ocgis_lh('initializing subset', interpreter_log, level=logging.DEBUG) so = OperationsEngine(self.ops, progress=progress) # if there is no grouping on the output files, a singe converter is needed if self.ops.output_grouping is None: ocgis_lh('initializing converter', interpreter_log, level=logging.DEBUG) conv = self._get_converter_(Converter, outdir, prefix, so) ocgis_lh('starting converter write loop: {0}'.format(self.ops.output_format), interpreter_log, level=logging.DEBUG) ret = conv.write() else: raise NotImplementedError ocgis_lh('Operations successful.'.format(self.ops.prefix), interpreter_log) return ret except: # The output directory needs to be removed if one was created. Shutdown logging before to make sure there # is no file lock (Windows). ocgis_lh.shutdown() if vm.rank == 0 and made_output_directory: shutil.rmtree(outdir) raise finally: ocgis_lh.shutdown() if env.ADD_OPS_MPI_BARRIER: vm.Barrier()
def _iter_collections_(self): """:rtype: :class:`ocgis.collection.base.AbstractCollection`""" # Multivariate calculations require datasets come in as a list with all variable inputs part of the same # sequence. if self._has_multivariate_calculations: itr_rd = [[rd for rd in self.ops.dataset]] # Otherwise, process geometries expects a single element sequence. else: itr_rd = [[rd] for rd in self.ops.dataset] # Configure the progress object. self._progress.n_subsettables = len(itr_rd) self._progress.n_geometries = get_default_or_apply(self.ops.geom, len, default=1) self._progress.n_calculations = get_default_or_apply(self.ops.calc, len, default=0) # Some introductory logging. msg = '{0} dataset collection(s) to process.'.format( self._progress.n_subsettables) ocgis_lh(msg=msg, logger=self._subset_log) if self.ops.geom is None: msg = 'Entire spatial domain returned. No selection geometries requested.' else: msg = 'Each data collection will be subsetted by {0} selection geometries.'.format( self._progress.n_geometries) ocgis_lh(msg=msg, logger=self._subset_log) if self._progress.n_calculations == 0: msg = 'No calculations requested.' else: msg = 'The following calculations will be applied to each data collection: {0}.'. \ format(', '.join([_['func'] for _ in self.ops.calc])) ocgis_lh(msg=msg, logger=self._subset_log) # Process the incoming datasets. Convert from request datasets to fields as needed. for rds in itr_rd: try: msg = 'Processing URI(s): {0}'.format([rd.uri for rd in rds]) except AttributeError: # Field objects have no URIs. Multivariate calculations change how the request dataset iterator is # configured as well. msg = [] for rd in rds: try: msg.append(rd.uri) except AttributeError: # Likely a field object which does have a name. msg.append(rd.name) msg = 'Processing URI(s) / field names: {0}'.format(msg) ocgis_lh(msg=msg, logger=self._subset_log) for coll in self._process_subsettables_(rds): # If there are calculations, do those now and return a collection. if not vm.is_null and self.cengine is not None: ocgis_lh('Starting calculations.', self._subset_log) raise_if_empty(coll) # Look for any temporal grouping optimizations. if self.ops.optimizations is None: tgds = None else: tgds = self.ops.optimizations.get('tgds') # Execute the calculations. coll = self.cengine.execute(coll, file_only=self.ops.file_only, tgds=tgds) # If we need to spatially aggregate and calculations used raw values, update the collection # fields and subset geometries. if self.ops.aggregate and self.ops.calc_raw: coll_to_itr = coll.copy() for sfield, container in coll_to_itr.iter_fields( yield_container=True): sfield = _update_aggregation_wrapping_crs_( self, None, sfield, container, None) coll.add_field(sfield, container, force=True) else: # If there are no calculations, mark progress to indicate a geometry has been completed. self._progress.mark() # Conversion of groups. if self.ops.output_grouping is not None: raise NotImplementedError else: ocgis_lh('_iter_collections_ yielding', self._subset_log, level=logging.DEBUG) yield coll
def __init__(self, uri=None, variable=None, units=None, time_range=None, time_region=None, time_subset_func=None, level_range=None, conform_units_to=None, crs='auto', t_units=None, t_calendar=None, t_conform_units_to=None, grid_abstraction='auto', grid_is_isomorphic='auto', dimension_map=None, field_name=None, driver=None, regrid_source=True, regrid_destination=False, metadata=None, format_time=True, opened=None, uid=None, rename_variable=None, predicate=None, rotated_pole_priority=False, driver_kwargs=None): self._is_init = True self._field_name = field_name self._level_range = None self._time_range = None self._time_region = None self._time_subset_func = None self._driver_kwargs = driver_kwargs if rename_variable is not None: rename_variable = get_tuple(rename_variable) self._rename_variable = rename_variable self.rotated_pole_priority = rotated_pole_priority self.predicate = predicate if dimension_map is not None and isinstance(dimension_map, dict): dimension_map = DimensionMap.from_dict(dimension_map) self._dimension_map = dimension_map self._metadata = deepcopy(metadata) self._uri = None self.uid = uid # This is an "open" file-like object that may be passed in-place of file location parameters. self._opened = opened if opened is not None and driver is None: msg = 'If "opened" is not None, then a "driver" must be provided.' ocgis_lh(logger='request', exc=RequestValidationError('driver', msg)) # Field creation options. self.format_time = format_time self.grid_abstraction = grid_abstraction self.grid_is_isomorphic = grid_is_isomorphic # Flag used for regridding to determine if the coordinate system was assigned during initialization. self._has_assigned_coordinate_system = False if crs == 'auto' else True if uri is None: # Fields may be created from pure metadata. if metadata is not None: # The default OCGIS driver is NetCDF. if driver is None: driver = DriverKey.NETCDF_CF elif opened is None: ocgis_lh(logger='request', exc=RequestValidationError('uri', 'Cannot be None')) else: self._uri = get_uri(uri) if driver is None: klass = get_autodiscovered_driver(uri) else: klass = get_driver(driver) self._driver = klass(self) if variable is not None: variable = get_tuple(variable) self._variable = variable self.time_range = time_range self.time_region = time_region self.time_subset_func = time_subset_func self.level_range = level_range self._crs = deepcopy(crs) self.regrid_source = regrid_source self.regrid_destination = regrid_destination self.units = units self.conform_units_to = conform_units_to self._is_init = False self._validate_time_subset_() # Update metadata for time variable. tvar = self.dimension_map.get_variable(DMK.TIME) if tvar is not None: m = self.metadata['variables'][tvar] if t_units is not None: m['attrs']['units'] = t_units if t_calendar is not None: m['attrs']['calendar'] = t_calendar if t_conform_units_to is not None: from ocgis.util.units import get_units_object t_calendar = m['attrs'].get( 'calendar', constants.DEFAULT_TEMPORAL_CALENDAR) t_conform_units_to = get_units_object(t_conform_units_to, calendar=t_calendar) m['conform_units_to'] = t_conform_units_to
def _update_aggregation_wrapping_crs_(obj, alias, sfield, subset_sdim, subset_ugid): raise_if_empty(sfield) ocgis_lh('entering _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Aggregate if requested. if obj.ops.aggregate: ocgis_lh('aggregate requested in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # There may be no geometries if we are working with a gridded dataset. Load the geometries if this is the case. sfield.set_abstraction_geom() ocgis_lh( 'after sfield.set_abstraction_geom in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Union the geometries and spatially average the data variables. # with vm.scoped(vm.get_live_ranks_from_object(sfield)): sfield = sfield.geom.get_unioned(spatial_average=sfield.data_variables) ocgis_lh( 'after sfield.geom.get_unioned in _update_aggregation_wrapping_crs_', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # None is returned for the non-root process. Check we are in parallel and create an empty field. if sfield is None: if vm.size == 1: raise ValueError( 'None should not be returned from get_unioned if running on a single processor.' ) else: sfield = Field(is_empty=True) else: sfield = sfield.parent vm.create_subcomm_by_emptyable(SubcommName.SPATIAL_AVERAGE, sfield, is_current=True, clobber=True) if not vm.is_null and subset_sdim is not None and subset_sdim.geom is not None: # Add the unique geometry identifier variable. This should match the selection geometry's identifier. new_gid_variable_kwargs = dict( name=HeaderName.ID_GEOMETRY, value=subset_sdim.geom.ugid.get_value(), dimensions=sfield.geom.dimensions) dm = get_data_model(obj.ops) new_gid_variable = create_typed_variable_from_data_model( 'int', data_model=dm, **new_gid_variable_kwargs) sfield.geom.set_ugid(new_gid_variable) if vm.is_null: ocgis_lh(msg='null communicator following spatial average. returning.', logger=obj._subset_log, level=logging.DEBUG) return sfield raise_if_empty(sfield) ocgis_lh(msg='before wrapped_state in _update_aggregation_wrapping_crs_', logger=obj._subset_log, level=logging.DEBUG) try: wrapped_state = sfield.wrapped_state except WrappedStateEvalTargetMissing: # If there is no target for wrapping evaluation, then consider this unknown. wrapped_state = WrappedState.UNKNOWN ocgis_lh(msg='after wrapped_state in _update_aggregation_wrapping_crs_', logger=obj._subset_log, level=logging.DEBUG) # Wrap the returned data. if not env.OPTIMIZE_FOR_CALC and not sfield.is_empty: if wrapped_state == WrappedState.UNWRAPPED: ocgis_lh('wrap target is empty: {}'.format(sfield.is_empty), obj._subset_log, level=logging.DEBUG) # There may be no geometries if we are working with a gridded dataset. Load the geometries if this # is the case. sfield.set_abstraction_geom() if obj.ops.output_format in constants.VECTOR_OUTPUT_FORMATS and obj.ops.vector_wrap: ocgis_lh('wrapping output geometries', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Deepcopy geometries before wrapping as wrapping will be performed inplace. The original field may # need to be reused for additional subsets. geom = sfield.geom copied_geom = geom.get_value().copy() geom.set_value(copied_geom) geom.wrap() ocgis_lh('finished wrapping output geometries', obj._subset_log, alias=alias, ugid=subset_ugid, level=logging.DEBUG) # Transform back to rotated pole if necessary. original_rotated_pole_crs = obj._backtransform.get( constants.BackTransform.ROTATED_POLE) if original_rotated_pole_crs is not None: if not isinstance(obj.ops.output_crs, (Spherical, WGS84)): sfield.update_crs(original_rotated_pole_crs) # Update the coordinate system of the data output. if obj.ops.output_crs is not None: # If the geometry is not none, it may need to be projected to match the output coordinate system. if subset_sdim is not None and subset_sdim.crs != obj.ops.output_crs: subset_sdim.update_crs(obj.ops.output_crs) # Update the subsetted field's coordinate system. sfield = sfield.copy() sfield.update_crs(obj.ops.output_crs) # Wrap or unwrap the data if the coordinate system permits. _update_wrapping_(obj, sfield) ocgis_lh('leaving _update_aggregation_wrapping_crs_', obj._subset_log, level=logging.DEBUG) return sfield
def write(self): ocgis_lh('starting write method', self._log, logging.DEBUG) # Indicates if user geometries should be written to file. write_ugeom = False # Path to the output object. f = {KeywordArgument.PATH: self.path} build = True for coll in self: # This will be changed to "write" if we are on the build loop. write_mode = MPIWriteMode.APPEND if build: # During a build loop, create the file and write the first series of records. Let the drivers determine # the appropriate write modes for handling parallelism. write_mode = None # Write the user geometries if selected and there is one present on the incoming collection. if self._add_ugeom and coll.has_container_geometries: write_ugeom = True if write_ugeom: if vm.rank == 0: # The output file name for the user geometries. ugid_shp_name = self.prefix + '_ugid.shp' if self._add_ugeom_nest: ugeom_fiona_path = os.path.join( self._get_or_create_shp_folder_(), ugid_shp_name) else: ugeom_fiona_path = os.path.join( self.outdir, ugid_shp_name) else: ugeom_fiona_path = None build = False f[KeywordArgument.WRITE_MODE] = write_mode self._write_coll_(f, coll) if write_ugeom: with vm.scoped(SubcommName.UGEOM_WRITE, [0]): if not vm.is_null: for subset_field in list(coll.children.values()): subset_field.write(ugeom_fiona_path, write_mode=write_mode, driver=DriverVector) # The metadata and dataset descriptor files may only be written if OCGIS operations are present. ops = self.ops if ops is not None and self.add_auxiliary_files and MPI_RANK == 0: # Add OCGIS metadata output if requested. if self.add_meta: ocgis_lh('adding OCGIS metadata file', 'conv', logging.DEBUG) from ocgis.conv.meta import MetaOCGISConverter lines = MetaOCGISConverter(ops).write() out_path = os.path.join( self.outdir, self.prefix + '_' + MetaOCGISConverter._meta_filename) with open(out_path, 'w') as f: f.write(lines) # Add the dataset descriptor file if requested. if self._add_did_file: ocgis_lh('writing dataset description (DID) file', 'conv', logging.DEBUG) path = os.path.join(self.outdir, self.prefix + '_did.csv') _write_dataset_identifier_file_(path, ops) # Add source metadata if requested. if self._add_source_meta: ocgis_lh('writing source metadata file', 'conv', logging.DEBUG) path = os.path.join(self.outdir, self.prefix + '_source_metadata.txt') _write_source_meta_(path, ops) # Return the internal path unless overloaded by subclasses. ret = self._get_return_() return ret
def value_datetime(self): if self._value_datetime is None: if self._get_optimized_('_value_datetime') is False: ocgis_lh('getting value_datetime','nc.dimension',logging.DEBUG) self._value_datetime = np.atleast_1d(self.get_datetime(self.value)) return(self._value_datetime)
def iter_geoms(self, key=None, select_uid=None, path=None, load_geoms=True, as_field=False, uid=None, select_sql_where=None, slc=None, union=False, data_model=None, driver_kwargs=None): """ See documentation for :class:`~ocgis.GeomCabinetIterator`. """ # Get the path to the output shapefile. shp_path = self._get_path_by_key_or_direct_path_(key=key, path=path) # Get the source metadata. meta = self.get_meta(path=shp_path, driver_kwargs=driver_kwargs) if union: gic = GeomCabinetIterator(key=key, select_uid=select_uid, path=path, load_geoms=load_geoms, as_field=False, uid=uid, select_sql_where=select_sql_where, slc=slc, union=False, data_model=data_model, driver_kwargs=driver_kwargs) yld = Field.from_records(gic, meta['schema'], crs=meta['crs'], uid=uid, union=True, data_model=data_model) yield yld else: if slc is not None and (select_uid is not None or select_sql_where is not None): exc = ValueError('Slice is not allowed with other select statements.') ocgis_lh(exc=exc, logger='geom_cabinet') # Format the slice for iteration. We will get the features by index if a slice is provided. if slc is not None: slc = get_index_slice_for_iteration(slc) # Open the target geometry file. ds = ogr.Open(shp_path) try: # Return the features iterator. features = self._get_features_object_(ds, uid=uid, select_uid=select_uid, select_sql_where=select_sql_where, driver_kwargs=driver_kwargs) # Using slicing, we will select the features individually from the object. if slc is None: itr = features else: # The geodatabase API requires iterations to get the given location. if self.get_gdal_driver(shp_path) == 'OpenFileGDB' or isinstance(slc, slice): def _o_itr_(features_object, slice_start, slice_stop): for ctr2, fb in enumerate(features_object): # ... iterate until start is reached. if ctr2 < slice_start: continue # ... stop if we have reached the stop. elif ctr2 == slice_stop: raise StopIteration yield fb itr = _o_itr_(features, slc.start, slc.stop) else: # Convert the slice index to an integer to avoid type conflict in GDAL layer. itr = (features.GetFeature(int(idx)) for idx in slc) # Convert feature objects to record dictionaries. for ctr, feature in enumerate(itr): if load_geoms: yld = {'geom': wkb.loads(feature.geometry().ExportToWkb())} else: yld = {} items = feature.items() properties = OrderedDict([(key, items[key]) for key in feature.keys()]) yld.update({'properties': properties, 'meta': meta}) if ctr == 0: uid, add_uid = get_uid_from_properties(properties, uid) # The properties schema needs to be updated to account for the adding of a unique identifier. if add_uid: meta['schema']['properties'][uid] = 'int' # Add the unique identifier if required if add_uid: properties[uid] = feature.GetFID() # Ensure the unique identifier is an integer else: properties[uid] = int(properties[uid]) if as_field: yld = Field.from_records([yld], schema=meta['schema'], crs=yld['meta']['crs'], uid=uid, data_model=data_model) yield yld try: assert ctr >= 0 except UnboundLocalError: # occurs if there were not feature returned by the iterator. raise a more clear exception. msg = 'No features returned from target data source. Were features appropriately selected?' raise ValueError(msg) finally: # Close or destroy the data source object if it actually exists. if ds is not None: ds.Destroy() ds = None
def get_collection((so, geom, logger)): ''' :type so: SubsetOperation :type geom: None, GeometryDataset, ShpDataset :rtype: AbstractCollection ''' ## initialize the collection object to store the subsetted data. coll = RawCollection(ugeom=geom, ops=so.ops) ## perform the operations on each request dataset ocgis_lh('{0} request dataset(s) to process'.format(len(so.ops.dataset)), logger) ## reference the geometry ugid ugid = None if geom is None else geom.spatial.uid[0] for request_dataset in so.ops.dataset: ## reference the request dataset alias alias = request_dataset.alias ocgis_lh('processing', logger, level=logging.INFO, alias=alias, ugid=ugid) ## copy the geometry copy_geom = deepcopy(geom) ## reference the dataset object ods = request_dataset.ds ## return a slice or do the other operations if so.ops.slice is not None: ods = ods.__getitem__(so.ops.slice) ## other subsetting operations else: ## if a geometry is passed and the target dataset is 360 longitude, ## unwrap the passed geometry to match the spatial domain of the target ## dataset. if copy_geom is None: igeom = None else: ## check projections adjusting projection the selection geometry ## if necessary if type(ods.spatial.projection) != type( copy_geom.spatial.projection): msg = 'projecting selection geometry to match input projection: {0} to {1}' msg = msg.format( copy_geom.spatial.projection.__class__.__name__, ods.spatial.projection.__class__.__name__) ocgis_lh(msg, logger, alias=alias, ugid=ugid) copy_geom.project(ods.spatial.projection) else: ocgis_lh('projections match', logger, alias=alias, ugid=ugid) ## unwrap the data if it is geographic and 360 if type(ods.spatial.projection ) == WGS84 and ods.spatial.is_360: ocgis_lh( 'unwrapping selection geometry with axis={0}'.format( ods.spatial.pm), logger, alias=alias, ugid=ugid) w = Wrapper(axis=ods.spatial.pm) copy_geom.spatial.geom[0] = w.unwrap( deepcopy(copy_geom.spatial.geom[0])) igeom = copy_geom.spatial.geom[0] ## perform the data subset try: ## pull the temporal subset which may be a range or region. if ## it is a snippet operation, set the temporal subset to None ## as a slice has already been applied. however, if a calculation ## is present leave the temporal subset alone. if so.ops.snippet and so.ops.calc is None: temporal = None else: temporal = request_dataset.time_range or request_dataset.time_region ocgis_lh('executing get_subset', logger, level=logging.DEBUG) ods = ods.get_subset( spatial_operation=so.ops.spatial_operation, igeom=igeom, temporal=temporal, level=request_dataset.level_range) ## for the case of time range and time region subset, apply the ## time region subset following the time range subset. if request_dataset.time_range is not None and request_dataset.time_region is not None: ods._temporal = ods.temporal.subset( request_dataset.time_region) ## aggregate the geometries and data if requested if so.ops.aggregate: ocgis_lh( 'aggregating target geometries and area-weighting values', logger, alias=alias, ugid=ugid) ## the new geometry will have the same id as the passed ## geometry. if it does not have one, simple give it a value ## of 1 as it is the only geometry requested for subsetting. try: new_geom_id = copy_geom.spatial.uid[0] except AttributeError: new_geom_id = 1 ## do the aggregation in place. clip_geom = None if copy_geom is None else copy_geom.spatial.geom[ 0] ods.aggregate(new_geom_id=new_geom_id, clip_geom=clip_geom) ## wrap the returned data depending on the conditions of the ## operations. if not env.OPTIMIZE_FOR_CALC: if type(ods.spatial.projection) == WGS84 and \ ods.spatial.is_360 and \ so.ops.output_format != 'nc' and \ so.ops.vector_wrap: ocgis_lh('wrapping output geometries', logger, alias=alias, ugid=ugid) ods.spatial.vector.wrap() ocgis_lh('geometries wrapped', logger, alias=alias, ugid=ugid, level=logging.DEBUG) ## check for all masked values if env.OPTIMIZE_FOR_CALC is False and so.ops.file_only is False: if ods.value.mask.all(): ## masked data may be okay depending on other opeartional ## conditions. if so.ops.snippet or so.ops.allow_empty: if so.ops.snippet: ocgis_lh( 'all masked data encountered but allowed for snippet', logger, alias=alias, ugid=ugid, level=logging.WARN) if so.ops.allow_empty: ocgis_lh( 'all masked data encountered but empty returns allowed', logger, alias=alias, ugid=ugid, level=logging.WARN) pass else: ## if the geometry is also masked, it is an empty spatial ## operation. if ods.spatial.vector.geom.mask.all(): raise (EmptyData) else: ocgis_lh(None, logger, exc=MaskedDataError(), alias=alias, ugid=ugid) ## there may be no data returned - this may be real or could be an ## error. by default, empty returns are not allowed except EmptyData as ed: if so.ops.allow_empty: if ed.origin == 'time': msg = 'the time subset returned empty but empty returns are allowed' else: msg = 'the geometric operations returned empty but empty returns are allowed' ocgis_lh(msg, logger, alias=alias, ugid=ugid) continue else: if ed.origin == 'time': msg = 'empty temporal subset operation' else: msg = 'empty geometric operation' ocgis_lh(msg, logger, exc=ExtentError(msg), alias=alias, ugid=ugid) ods.spatial._ugid = ugid coll.variables.update({request_dataset.alias: ods}) ## if there are calculations, do those now and return a new type of collection if so.cengine is not None: ocgis_lh('performing computations', logger, alias=alias, ugid=ugid) coll = so.cengine.execute(coll, file_only=so.ops.file_only) ## conversion of groups. if so.ops.output_grouping is not None: raise (NotImplementedError) else: ocgis_lh('subset returning', logger, level=logging.INFO) return (coll)