def test_iter_melted(self): variables = [self.get_variable(), self.get_variable('tas_foo2')] vc = VariableCollection(variables=variables) test = set() for row in vc.iter_melted(): test.update([row['alias']]) self.assertAsSetEqual(test, [xx.alias for xx in variables])
def test_get_sliced_variables(self): variables = [self.get_variable(), self.get_variable('tas_foo2')] vc = VariableCollection(variables=variables) ret = vc.get_sliced_variables(slice(1)) for k, v in ret.iteritems(): self.assertNumpyAll(v.value, np.ma.array([4])) for k, v in ret.iteritems(): self.assertTrue(np.may_share_memory(v.value, ret[k].value))
def test_add_variable_already_in_collection_uids_update(self): vc = VariableCollection() var = self.get_variable() vc.add_variable(var) self.assertEqual(var.uid, 1) var.alias = 'variable_2' vc.add_variable(var, assign_new_uid=True) self.assertEqual(var.uid, 2) self.assertEqual(vc._storage_id, [1, 2])
def variables(self,value): if isinstance(value,Variable): value = VariableCollection(variables=[value]) assert_raise(isinstance(value,VariableCollection),exc=ValueError('The "variables" keyword must be a Variable object.')) self._variables = value for v in value.itervalues(): v._field = self if v._value is not None: assert(v._value.shape == self.shape)
def _add_to_collection_(self,units=None,value=None,parent_variables=None,alias=None, dtype=None,fill_value=None): ## dtype should come in with each new variable assert(dtype is not None) ## if there is no fill value, use the default for the data type if fill_value is None: fill_value = np.ma.array([],dtype=dtype).fill_value ## the value parameters should come in as a dictionary with two keys try: fill = value['fill'] sample_size = value['sample_size'] ## some computations will just pass the array without the sample size ## if _get_temporal_agg_fill_ is bypassed. except ValueError: fill = value sample_size = None alias = alias or self.alias fdef = self.get_function_definition() meta = {'attrs':{'standard_name':self.standard_name, 'long_name':self.long_name}} parents = VariableCollection(variables=parent_variables) ## attempt to copy the grid_mapping attribute for the derived variable try: meta['attrs']['grid_mapping'] = parents.first().meta['attrs']['grid_mapping'] except KeyError: pass ## if the operation is file only, creating a variable with an empty ## value will raise an exception. pass a dummy data source because even ## if the value is trying to be loaded it should not be accessible! if self.file_only: data = 'foo_data_source' else: data = None dv = DerivedVariable(name=self.key,alias=alias,units=units,value=fill, fdef=fdef,parents=parents,meta=meta,data=data, dtype=dtype,fill_value=fill_value) ## allow more complex manipulations of metadata self.set_variable_metadata(dv) ## add the variable to the variable collection self._set_derived_variable_alias_(dv,parent_variables) self.vc.add_variable(dv) ## add the sample size if it is present in the fill dictionary if sample_size is not None: meta = {'attrs':{'standard_name':'sample_size', 'long_name':'Statistical Sample Size'}} dv = DerivedVariable(name=None,alias='n_'+dv.alias,units=None,value=sample_size, fdef=None,parents=parents,meta=meta,dtype=constants.np_int, fill_value=fill_value) self.vc.add_variable(dv)
def test_iter_columns(self): variables = [self.get_variable(), self.get_variable('tas_foo2')] variables[1].value *= 2 variables[1].value.mask[2] = True vc = VariableCollection(variables=variables) rows = list(vc.iter_columns()) self.assertEqual(len(rows), 3) self.assertEqual(rows[1][1].keys(), ['tas_foo', 'tas_foo2']) self.assertIsInstance(rows[2][1], OrderedDict) for row in rows: self.assertTrue(row[0], 20)
def variables(self, value): if value is None: value = VariableCollection() else: if isinstance(value, Variable): value = VariableCollection(variables=[value]) if not isinstance(value, VariableCollection): raise ValueError('The value must be a Variable or VariableCollection object.') self._variables = value for v in value.itervalues(): v._field = self if v._value is not None: assert v._value.shape == self.shape
def _get_field_(self, format_time=None): # todo: option to pass select_ugid # todo: option for time dimension and time subsetting # todo: remove format_time option - there for compatibility with the netCDF driver from ocgis import SpatialDimension ds = self.open() try: records = list(ds) sdim = SpatialDimension.from_records(records, crs=self.get_crs()) # do not load the properties - they are transformed to variables in the case of the values put into fields sdim.properties = None vc = VariableCollection() for xx in self.rd: value = np.array([yy['properties'][xx['variable']] for yy in records]).reshape(1, 1, 1, 1, -1) var = Variable(name=xx['variable'], alias=xx['alias'], units=xx['units'], conform_units_to=xx['units'], value=value) vc.add_variable(var, assign_new_uid=True) field = Field(spatial=sdim, variables=vc, name=self.rd.name) return field finally: self.close(ds)
def test_add_variable(self): vc = VariableCollection() var = self.get_variable() self.assertEqual(var.uid, None) vc.add_variable(var) self.assertEqual(var.uid, 1) self.assertTrue('tas_foo' in vc) self.assertEqual(vc._storage_id, [1]) var.alias = 'again' with self.assertRaises(AssertionError): vc.add_variable(var) var.uid = 100 vc.add_variable(var) self.assertEqual(vc._storage_id, [1, 100])
def _get_field_(self, format_time=True): """ :param bool format_time: :raises ValueError: """ # reference the request dataset's source metadata source_metadata = self.rd.source_metadata def _get_temporal_adds_(ref_attrs): # calendar should default to standard if it is not present and the t_calendar overload is not used. calendar = self.rd.t_calendar or ref_attrs.get("calendar", None) or "standard" return { "units": self.rd.t_units or ref_attrs["units"], "calendar": calendar, "format_time": format_time, "conform_units_to": self.rd.t_conform_units_to, } # parameters for the loading loop to_load = { "temporal": { "cls": NcTemporalDimension, "adds": _get_temporal_adds_, "axis": "T", "name_uid": "tid", "name": "time", }, "level": {"cls": NcVectorDimension, "adds": None, "axis": "Z", "name_uid": "lid", "name": "level"}, "row": {"cls": NcVectorDimension, "adds": None, "axis": "Y", "name_uid": "yc_id", "name": "yc"}, "col": {"cls": NcVectorDimension, "adds": None, "axis": "X", "name_uid": "xc_id", "name": "xc"}, "realization": { "cls": NcVectorDimension, "adds": None, "axis": "R", "name_uid": "rlz_id", "name_value": "rlz", }, } loaded = {} kwds_grid = {} has_row_column = True for k, v in to_load.iteritems(): fill = self._get_vector_dimension_(k, v, source_metadata) if k != "realization" and not isinstance(fill, NcVectorDimension) and fill is not None: assert k in ("row", "col") has_row_column = False kwds_grid[k] = fill loaded[k] = fill loaded_keys = set([k for k, v in loaded.iteritems() if v is not None]) if has_row_column: if not {"temporal", "row", "col"}.issubset(loaded_keys): raise ValueError("Target variable must at least have temporal, row, and column dimensions.") kwds_grid = {"row": loaded["row"], "col": loaded["col"]} else: shape_src_idx = [source_metadata["dimensions"][xx]["len"] for xx in kwds_grid["row"]["dimensions"]] src_idx = { "row": np.arange(0, shape_src_idx[0], dtype=np.int32), "col": np.arange(0, shape_src_idx[1], dtype=np.int32), } name_row = kwds_grid["row"]["name"] name_col = kwds_grid["col"]["name"] kwds_grid = {"name_row": name_row, "name_col": name_col, "data": self.rd, "src_idx": src_idx} grid = NcSpatialGridDimension(**kwds_grid) spatial = SpatialDimension(name_uid="gid", grid=grid, crs=self.rd.crs, abstraction=self.rd.s_abstraction) vc = VariableCollection() for vdict in self.rd: variable_meta = deepcopy(source_metadata["variables"][vdict["variable"]]) variable_units = vdict["units"] or variable_meta["attrs"].get("units") dtype = np.dtype(variable_meta["dtype"]) fill_value = variable_meta["fill_value"] variable = Variable( vdict["variable"], vdict["alias"], units=variable_units, meta=variable_meta, data=self.rd, conform_units_to=vdict["conform_units_to"], dtype=dtype, fill_value=fill_value, attrs=variable_meta["attrs"].copy(), ) vc.add_variable(variable) ret = NcField( variables=vc, spatial=spatial, temporal=loaded["temporal"], level=loaded["level"], realization=loaded["realization"], meta=source_metadata.copy(), uid=self.rd.did, name=self.rd.name, attrs=source_metadata["dataset"].copy(), ) # Apply any subset parameters after the field is loaded. if self.rd.time_range is not None: ret = ret.get_between("temporal", min(self.rd.time_range), max(self.rd.time_range)) if self.rd.time_region is not None: ret = ret.get_time_region(self.rd.time_region) if self.rd.time_subset_func is not None: ret = ret.get_time_subset_by_function(self.rd.time_subset_func) if self.rd.level_range is not None: try: ret = ret.get_between("level", min(self.rd.level_range), max(self.rd.level_range)) except AttributeError: # there may be no level dimension if ret.level is None: msg = messages.M4.format(self.rd.alias) raise ValueError(msg) else: raise return ret
def execute(self,coll,file_only=False,tgds=None): ''' :param :class:~`ocgis.SpatialCollection` coll: :param bool file_only: :param dict tgds: {'field_alias': :class:`ocgis.interface.base.dimension.temporal.TemporalGroupDimension`,...} ''' ## switch field type based on the types of calculations present if self._check_calculation_members_(self.funcs,AbstractMultivariateFunction): klass = DerivedMultivariateField elif self._check_calculation_members_(self.funcs,EvalFunction): ## if the input field has more than one variable, assumed this is a ## multivariate calculation klass = DerivedField for field_container in coll.itervalues(): for field in field_container.itervalues(): if len(field.variables.keys()) > 1: klass = DerivedMultivariateField break else: klass = DerivedField ## select which dictionary will hold the temporal group dimensions if tgds == None: tgds_to_use = self._tgds tgds_overloaded = False else: tgds_to_use = tgds tgds_overloaded = True ## group the variables. if grouping is None, calculations are performed ## on each element. array computations are taken advantage of. if self.grouping is not None: ocgis_lh('Setting temporal groups: {0}'.format(self.grouping),'calc.engine') for v in coll.itervalues(): for k2,v2 in v.iteritems(): if tgds_overloaded: assert(k2 in tgds_to_use) else: if k2 not in tgds_to_use: tgds_to_use[k2] = v2.temporal.get_grouping(self.grouping) ## iterate over functions for ugid,dct in coll.iteritems(): for alias_field,field in dct.iteritems(): ## choose a representative data type based on the first variable dtype = field.variables.values()[0].dtype new_temporal = tgds_to_use.get(alias_field) ## if the engine has a grouping, ensure it is equivalent to the ## new temporal dimension. if self.grouping is not None: try: compare = set(new_temporal.grouping) == set(self.grouping) ## types may be unhashable, compare directly except TypeError: compare = new_temporal.grouping == self.grouping if compare == False: msg = ('Engine temporal grouping and field temporal grouping ' 'are not equivalent. Perhaps optimizations are incorrect?') ocgis_lh(logger='calc.engine',exc=ValueError(msg)) out_vc = VariableCollection() for f in self.funcs: try: ocgis_lh('Calculating: {0}'.format(f['func']),logger='calc.engine') ## initialize the function function = f['ref'](alias=f['name'],dtype=dtype,field=field,file_only=file_only,vc=out_vc, parms=f['kwds'],tgd=new_temporal,use_raw_values=self.use_raw_values, calc_sample_size=self.calc_sample_size) except KeyError: ## likely an eval function which does not have the name ## key function = EvalFunction(field=field,file_only=file_only,vc=out_vc, expr=self.funcs[0]['func']) ocgis_lh('calculation initialized',logger='calc.engine',level=logging.DEBUG) ## return the variable collection from the calculations out_vc = function.execute() for dv in out_vc.itervalues(): ## any outgoing variables from a calculation must have a ## data type associated with it try: assert(dv.dtype != None) except AssertionError: assert(isinstance(dv.dtype,np.dtype)) ## if this is a file only operation, then there should ## be no values. if file_only: assert(dv._value == None) ocgis_lh('calculation finished',logger='calc.engine',level=logging.DEBUG) ## try to mark progress try: self._progress.mark() except AttributeError: pass new_temporal = new_temporal or field.temporal new_field = klass(variables=out_vc,temporal=new_temporal,spatial=field.spatial, level=field.level,realization=field.realization,meta=field.meta, uid=field.uid,name=field.name) coll[ugid][alias_field] = new_field return(coll)
def get_field(self, format_time=True, interpolate_spatial_bounds=False): """ :param bool format_time: :param bool interpolate_spatial_bounds: :raises ValueError: """ def _get_temporal_adds_(ref_attrs): ## calendar should default to standard if it is not present and the ## t_calendar overload is not used. calendar = self.rd.t_calendar or ref_attrs.get('calendar', None) or 'standard' return ({'units': self.rd.t_units or ref_attrs['units'], 'calendar': calendar, 'format_time': format_time}) ## this dictionary contains additional keyword arguments for the row ## and column dimensions. adds_row_col = {'interpolate_bounds': interpolate_spatial_bounds} ## parameters for the loading loop to_load = {'temporal': {'cls': NcTemporalDimension, 'adds': _get_temporal_adds_, 'axis': 'T', 'name_uid': 'tid', 'name_value': 'time'}, 'level': {'cls': NcVectorDimension, 'adds': None, 'axis': 'Z', 'name_uid': 'lid', 'name_value': 'level'}, 'row': {'cls': NcVectorDimension, 'adds': adds_row_col, 'axis': 'Y', 'name_uid': 'row_id', 'name_value': 'row'}, 'col': {'cls': NcVectorDimension, 'adds': adds_row_col, 'axis': 'X', 'name_uid': 'col_id', 'name_value': 'col'}, 'realization': {'cls': NcVectorDimension, 'adds': None, 'axis': 'R', 'name_uid': 'rlz_id', 'name_value': 'rlz'}} loaded = {} for k, v in to_load.iteritems(): ## this is the string axis representation axis_value = v['axis'] or v['cls']._axis ## pull the axis information out of the dimension map ref_axis = self.rd.source_metadata['dim_map'].get(axis_value) ref_axis = self.rd.source_metadata['dim_map'].get(axis_value) ## if the axis is not represented, fill it with none. this happens ## when a dataset does not have a vertical level or projection axis ## for example. if ref_axis is None: fill = None else: ref_variable = self.rd.source_metadata['variables'].get(ref_axis['variable']) ## for data with a projection/realization axis there may be no ## associated variable. try: ref_variable['axis'] = ref_axis except TypeError: if axis_value == 'R' and ref_variable is None: ref_variable = {'axis': ref_axis, 'name': ref_axis['dimension'], 'attrs': {}} ## extract the data length to use when creating the source index ## arrays. length = self.rd.source_metadata['dimensions'][ref_axis['dimension']]['len'] src_idx = np.arange(0, length, dtype=constants.np_int) ## get the target data type for the dimension try: dtype = np.dtype(ref_variable['dtype']) ## the realization dimension may not be a associated with a variable except KeyError: if k == 'realization' and ref_variable['axis']['variable'] is None: dtype = None else: raise ## assemble parameters for creating the dimension class then initialize ## the class. kwds = dict(name_uid=v['name_uid'], name_value=v['name_value'], src_idx=src_idx, data=self.rd, meta=ref_variable, axis=axis_value, name=ref_variable.get('name'), dtype=dtype) ## there may be additional parameters for each dimension. if v['adds'] is not None: try: kwds.update(v['adds'](ref_variable['attrs'])) ## adds may not be a callable object. assume they are a ## dictionary. except TypeError: kwds.update(v['adds']) kwds.update({'name': ref_variable.get('name')}) fill = v['cls'](**kwds) loaded[k] = fill assert_raise(set(('temporal', 'row', 'col')).issubset(set([k for k, v in loaded.iteritems() if v != None])), logger='request', exc=ValueError('Target variable must at least have temporal, row, and column dimensions.')) grid = SpatialGridDimension(row=loaded['row'], col=loaded['col']) # crs = None # if rd.crs is not None: # crs = rd.crs # else: # crs = rd._get_crs_(rd._variable[0]) # if crs is None: # ocgis_lh('No "grid_mapping" attribute available assuming WGS84: {0}'.format(rd.uri), # 'request', logging.WARN) # crs = CFWGS84() spatial = SpatialDimension(name_uid='gid', grid=grid, crs=self.rd.crs, abstraction=self.rd.s_abstraction) vc = VariableCollection() for vdict in self.rd: variable_meta = deepcopy(self.rd._source_metadata['variables'][vdict['variable']]) variable_units = vdict['units'] or variable_meta['attrs'].get('units') dtype = np.dtype(variable_meta['dtype']) fill_value = variable_meta['fill_value'] variable = Variable(vdict['variable'], vdict['alias'], units=variable_units, meta=variable_meta, data=self.rd, conform_units_to=vdict['conform_units_to'], dtype=dtype, fill_value=fill_value) vc.add_variable(variable) ret = NcField(variables=vc, spatial=spatial, temporal=loaded['temporal'], level=loaded['level'], realization=loaded['realization'], meta=deepcopy(self.rd._source_metadata), uid=self.rd.did, name=self.rd.name) ## apply any subset parameters after the field is loaded if self.rd.time_range is not None: ret = ret.get_between('temporal', min(self.rd.time_range), max(self.rd.time_range)) if self.rd.time_region is not None: ret = ret.get_time_region(self.rd.time_region) if self.rd.level_range is not None: try: ret = ret.get_between('level', min(self.rd.level_range), max(self.rd.level_range)) except AttributeError: ## there may be no level dimension if ret.level == None: msg = ("A level subset was requested but the target dataset does not have a level dimension. The " "dataset's alias is: {0}".format(self.rd.alias)) raise (ValueError(msg)) else: raise return ret
def test_add_variable_already_in_collection(self): vc = VariableCollection() var = self.get_variable() vc.add_variable(var) with self.assertRaises(VariableInCollectionError): vc.add_variable(var)
def test_init_variable_not_none(self): variables = [self.get_variable(), [self.get_variable(), self.get_variable('tas_foo2')]] for v in variables: vc = VariableCollection(variables=v) self.assertEqual(vc.keys(), [iv.alias for iv in get_iter(v, dtype=Variable)])
def _get_field_(self, format_time=True): """ :param bool format_time: :raises ValueError: """ # reference the request dataset's source metadata source_metadata = self.rd.source_metadata def _get_temporal_adds_(ref_attrs): # calendar should default to standard if it is not present and the t_calendar overload is not used. calendar = self.rd.t_calendar or ref_attrs.get('calendar', None) or 'standard' return {'units': self.rd.t_units or ref_attrs['units'], 'calendar': calendar, 'format_time': format_time, 'conform_units_to': self.rd.t_conform_units_to} # parameters for the loading loop to_load = {'temporal': {'cls': NcTemporalDimension, 'adds': _get_temporal_adds_, 'axis': 'T', 'name_uid': 'tid', 'name': 'time'}, 'level': {'cls': NcVectorDimension, 'adds': None, 'axis': 'Z', 'name_uid': 'lid', 'name': 'level'}, 'row': {'cls': NcVectorDimension, 'adds': None, 'axis': 'Y', 'name_uid': 'yc_id', 'name': 'yc'}, 'col': {'cls': NcVectorDimension, 'adds': None, 'axis': 'X', 'name_uid': 'xc_id', 'name': 'xc'}, 'realization': {'cls': NcVectorDimension, 'adds': None, 'axis': 'R', 'name_uid': 'rlz_id', 'name_value': 'rlz'}} loaded = {} kwds_grid = {} has_row_column = True for k, v in to_load.iteritems(): fill = self._get_vector_dimension_(k, v, source_metadata) if k != 'realization' and not isinstance(fill, NcVectorDimension) and fill is not None: assert k in ('row', 'col') has_row_column = False kwds_grid[k] = fill loaded[k] = fill loaded_keys = set([k for k, v in loaded.iteritems() if v is not None]) if has_row_column: if not {'temporal', 'row', 'col'}.issubset(loaded_keys): raise ValueError('Target variable must at least have temporal, row, and column dimensions.') kwds_grid = {'row': loaded['row'], 'col': loaded['col']} else: shape_src_idx = [source_metadata['dimensions'][xx]['len'] for xx in kwds_grid['row']['dimensions']] src_idx = {'row': np.arange(0, shape_src_idx[0], dtype=np.int32), 'col': np.arange(0, shape_src_idx[1], dtype=np.int32)} name_row = kwds_grid['row']['name'] name_col = kwds_grid['col']['name'] kwds_grid = {'name_row': name_row, 'name_col': name_col, 'request_dataset': self.rd, 'src_idx': src_idx} grid = NcSpatialGridDimension(**kwds_grid) spatial = SpatialDimension(name_uid='gid', grid=grid, crs=self.rd.crs, abstraction=self.rd.s_abstraction) vc = VariableCollection() for vdict in self.rd: variable_meta = deepcopy(source_metadata['variables'][vdict['variable']]) variable_units = vdict['units'] or variable_meta['attrs'].get('units') attrs = variable_meta['attrs'].copy() if variable_meta['dtype_packed'] is None: dtype = np.dtype(variable_meta['dtype']) fill_value = variable_meta['fill_value'] else: dtype = np.dtype(variable_meta['dtype_packed']) fill_value = variable_meta['fill_value_packed'] # Remove scale factors and offsets from the metadata. attrs.pop('scale_factor') attrs.pop('add_offset', None) attrs.pop('missing_value', None) attrs.pop('_Fill_Value', None) variable = Variable(vdict['variable'], vdict['alias'], units=variable_units, meta=variable_meta, request_dataset=self.rd, conform_units_to=vdict['conform_units_to'], dtype=dtype, fill_value=fill_value, attrs=attrs) vc.add_variable(variable) ret = NcField(variables=vc, spatial=spatial, temporal=loaded['temporal'], level=loaded['level'], realization=loaded['realization'], meta=source_metadata.copy(), uid=self.rd.did, name=self.rd.name, attrs=source_metadata['dataset'].copy()) # Apply any subset parameters after the field is loaded. if self.rd.time_range is not None: ret = ret.get_between('temporal', min(self.rd.time_range), max(self.rd.time_range)) if self.rd.time_region is not None: ret = ret.get_time_region(self.rd.time_region) if self.rd.time_subset_func is not None: ret = ret.get_time_subset_by_function(self.rd.time_subset_func) if self.rd.level_range is not None: try: ret = ret.get_between('level', min(self.rd.level_range), max(self.rd.level_range)) except AttributeError: # there may be no level dimension if ret.level is None: msg = messages.M4.format(self.rd.alias) raise ValueError(msg) else: raise return ret