def test_system_get_field_dimensioned_variables(self): """Test data is appropriately tagged to identify dimensioned variables.""" path = self.get_temporary_file_path('foo.nc') time = TemporalVariable(value=[1, 2, 3], dimensions='time') x = Variable(name='x', value=[10, 20], dimensions='x') y = Variable(name='y', value=[30, 40, 50, 60], dimensions='y') data1 = Variable(name='data1', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data2 = Variable(name='data2', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data3 = Variable(name='data3', value=[11, 12, 13], dimensions=['time']) field = Field(time=time, grid=Grid(x, y), variables=[data1, data2, data3]) field.write(path) # Test dimensioned variables are read from a file with appropriate metadata. rd = RequestDataset(path) self.assertEqual(rd.variable, ('data1', 'data2')) read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data1', 'data2')) # Test dimensioned variables are overloaded. rd = RequestDataset(path, variable='data2') read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data2', ))
def test_system_create_field_dimensioned_variables(self): """Test data is appropriately tagged to identify dimensioned variables.""" path = self.get_temporary_file_path('foo.nc') time = TemporalVariable(value=[1, 2, 3], dimensions='time') x = Variable(name='x', value=[10, 20], dimensions='x') y = Variable(name='y', value=[30, 40, 50, 60], dimensions='y') data1 = Variable(name='data1', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data2 = Variable(name='data2', value=np.random.rand(3, 4, 2), dimensions=['time', 'y', 'x']) data3 = Variable(name='data3', value=[11, 12, 13], dimensions=['time']) field = Field(time=time, grid=Grid(x, y), variables=[data1, data2, data3]) field.write(path) # Test dimensioned variables are read from a file with appropriate metadata. rd = RequestDataset(path) self.assertEqual(rd.variable, ('data1', 'data2')) read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data1', 'data2')) # Test dimensioned variables are overloaded. rd = RequestDataset(path, variable='data2') read_field = rd.get() actual = get_variable_names(read_field.data_variables) self.assertEqual(actual, ('data2',))
def test(self): gs = self.fixture_grid_chunker() desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum() desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum) if vm.rank == 0: desired_sum = np.sum(desired_dst_grid_sum) desired = [{'y': slice(0, 180, None), 'x': slice(0, 240, None)}, {'y': slice(0, 180, None), 'x': slice(240, 480, None)}, {'y': slice(0, 180, None), 'x': slice(480, 720, None)}, {'y': slice(180, 360, None), 'x': slice(0, 240, None)}, {'y': slice(180, 360, None), 'x': slice(240, 480, None)}, {'y': slice(180, 360, None), 'x': slice(480, 720, None)}] actual = list(gs.iter_dst_grid_slices()) self.assertEqual(actual, desired) gs.write_chunks() if vm.rank == 0: rank_sums = [] for ctr in range(1, gs.nchunks_dst[0] * gs.nchunks_dst[1] + 1): src_path = gs.create_full_path_from_template('src_template', index=ctr) dst_path = gs.create_full_path_from_template('dst_template', index=ctr) src_field = RequestDataset(src_path).get() dst_field = RequestDataset(dst_path).get() src_envelope_global = box(*src_field.grid.extent_global) dst_envelope_global = box(*dst_field.grid.extent_global) self.assertTrue(does_contain(src_envelope_global, dst_envelope_global)) actual = get_variable_names(src_field.data_variables) self.assertIn('data', actual) actual = get_variable_names(dst_field.data_variables) self.assertIn('data', actual) actual_data_sum = dst_field['data'].get_value().sum() actual_data_sum = MPI_COMM.gather(actual_data_sum) if MPI_RANK == 0: actual_data_sum = np.sum(actual_data_sum) rank_sums.append(actual_data_sum) if vm.rank == 0: self.assertAlmostEqual(desired_sum, np.sum(rank_sums)) index_path = gs.create_full_path_from_template('index_file') self.assertTrue(os.path.exists(index_path)) vm.barrier() index_path = gs.create_full_path_from_template('index_file') index_field = RequestDataset(index_path).get() self.assertTrue(len(list(index_field.keys())) > 2)
def test(self): gs = self.get_grid_splitter() desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum() desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum) if MPI_RANK == 0: desired_sum = np.sum(desired_dst_grid_sum) desired = [{'y': slice(0, 180, None), 'x': slice(0, 240, None)}, {'y': slice(0, 180, None), 'x': slice(240, 480, None)}, {'y': slice(0, 180, None), 'x': slice(480, 720, None)}, {'y': slice(180, 360, None), 'x': slice(0, 240, None)}, {'y': slice(180, 360, None), 'x': slice(240, 480, None)}, {'y': slice(180, 360, None), 'x': slice(480, 720, None)}] actual = list(gs.iter_dst_grid_slices()) self.assertEqual(actual, desired) gs.write_subsets() if MPI_RANK == 0: rank_sums = [] for ctr in range(1, gs.nsplits_dst[0] * gs.nsplits_dst[1] + 1): src_path = gs.create_full_path_from_template('src_template', index=ctr) dst_path = gs.create_full_path_from_template('dst_template', index=ctr) src_field = RequestDataset(src_path).get() dst_field = RequestDataset(dst_path).get() src_envelope_global = box(*src_field.grid.extent_global) dst_envelope_global = box(*dst_field.grid.extent_global) self.assertTrue(does_contain(src_envelope_global, dst_envelope_global)) actual = get_variable_names(src_field.data_variables) self.assertIn('data', actual) actual = get_variable_names(dst_field.data_variables) self.assertIn('data', actual) actual_data_sum = dst_field['data'].get_value().sum() actual_data_sum = MPI_COMM.gather(actual_data_sum) if MPI_RANK == 0: actual_data_sum = np.sum(actual_data_sum) rank_sums.append(actual_data_sum) if MPI_RANK == 0: self.assertAlmostEqual(desired_sum, np.sum(rank_sums)) index_path = gs.create_full_path_from_template('index_file') self.assertTrue(os.path.exists(index_path)) MPI_COMM.Barrier() index_path = gs.create_full_path_from_template('index_file') index_field = RequestDataset(index_path).get() self.assertTrue(len(list(index_field.keys())) > 2)
def test1d(self): p1 = self.write_field_data('v1', ncol=1, nrow=1) p3 = self.write_field_data('v1', dir='b') ref_range = [dt.datetime(2000, 3, 1), dt.datetime(2000, 3, 31)] reference = ocgis.RequestDataset(p1, time_range=ref_range).get() cand_range = [dt.datetime(2000, 8, 1), dt.datetime(2000, 8, 31)] candidate = ocgis.RequestDataset(p3, time_range=cand_range) calc = [{ 'func': 'dissimilarity', 'name': 'output_1d', 'kwds': { 'target': reference, 'candidate': ('v1', ) } }] ops = OcgOperations(dataset=candidate, calc=calc) ret = ops.execute() actual_field = ret.get_element() actual_variables = get_variable_names(actual_field.data_variables) self.assertEqual(actual_variables[0], ('dissimilarity')) dist = actual_field['dissimilarity'] self.assertEqual(dist.shape, (1, 1, 2, 2))
def test_narccap_point_subset_small(self): dmap = {DimensionMapKey.X: {DimensionMapKey.VARIABLE: 'xc'}, DimensionMapKey.Y: {DimensionMapKey.VARIABLE: 'yc'}, DimensionMapKey.TIME: {DimensionMapKey.VARIABLE: 'time'}} rd = self.test_data.get_rd('narccap_pr_wrfg_ncep', kwds={'dimension_map': dmap}) field = rd.get() self.assertIsInstance(field.crs, CFLambertConformal) self.assertIsNotNone(field.time) geom = [-97.74278, 30.26694] calc = [{'func': 'mean', 'name': 'mean'}, {'func': 'median', 'name': 'median'}, {'func': 'max', 'name': 'max'}, {'func': 'min', 'name': 'min'}] calc_grouping = ['month', 'year'] ops = ocgis.OcgOperations(dataset=rd, calc=calc, calc_grouping=calc_grouping, output_format=constants.OutputFormatName.OCGIS, geom=geom, abstraction='point', snippet=False, allow_empty=False, output_crs=Spherical(), search_radius_mult=2.0) ret = ops.execute() ref = ret.get_element() actual = set(get_variable_names(ref.data_variables)) self.assertEqual(actual, {'mean', 'median', 'max', 'min'})
def inquire_is_xyz(self, variable): """ Inquire the dimension map to identify a variable's spatial classification. :param variable: The target variable to identify. :type variable: str | :class:`~ocgis.Variable` :rtype: :class:`ocgis.constants.DimensionMapKey` """ name = get_variable_names(variable)[0] x = self.get_variable(DMK.X) y = self.get_variable(DMK.Y) z = self.get_variable(DMK.LEVEL) poss = {x: DMK.X, y: DMK.Y, z: DMK.LEVEL} ret = poss.get(name) if ret is None and self.has_topology: poss = {} topologies = self.get_available_topologies() for t in topologies: curr = self.get_topology(t) x = curr.get_variable(DMK.X) y = curr.get_variable(DMK.Y) z = curr.get_variable(DMK.LEVEL) poss.update({x: DMK.X, y: DMK.Y, z: DMK.LEVEL}) ret = poss.get(name) return ret
def set_spatial_mask(self, variable, attrs=None, default_attrs=None): """ Set the spatial mask variable for the dimension map. If ``attrs`` is not ``None``, then ``attrs`` > ``variable.attrs`` (if ``variable`` is not a string) > default attributes. :param variable: The spatial mask variable. :param dict attrs: Attributes to associate with the spatial mask variable *in addition* to default attributes. :param dict default_attrs: If provided, use these attributes as default spatial mask attributes. :type variable: :class:`~ocgis.Variable` | :class:`str` """ if default_attrs is None: default_attrs = deepcopy(DIMENSION_MAP_TEMPLATE[DMK.SPATIAL_MASK][DMK.ATTRS]) try: vattrs = deepcopy(variable.attrs) except AttributeError: vattrs = {} if attrs is None: attrs = {} default_attrs.update(vattrs) default_attrs.update(attrs) variable = get_variable_names(variable)[0] entry = self._get_entry_(DMK.SPATIAL_MASK) entry[DMK.VARIABLE] = variable entry[DMK.ATTRS] = default_attrs
def test_get_operations(self): rd = self.test_data.get_rd('cancm4_tas') qs = "uri={0}&spatial_operation=intersects&geom=state_boundaries&geom_select_uid=20|30&output_format=shp&snippet=true".format( rd.uri) qi = QueryInterface(qs) ops = qi.get_operations() self.assertIsInstance(ops._get_object_('dataset'), Dataset) self.assertEqual(list(ops.dataset)[0].uri, rd.uri) self.assertIsInstance(ops.geom, GeomCabinetIterator) self.assertEqual(ops.spatial_operation, 'intersects') self.assertEqual(ops.geom_select_uid, (20, 30)) qs = "uri={0}&spatial_operation=intersects&geom=state_boundaries&geom_select_uid=20|30".format( rd.uri) qs += "&calc=mean~the_mean|median~the_median|freq_perc~the_p!percentile~90&calc_grouping=month|year&field_name=calcs" qs += "&output_crs=4326&conform_units_to=celsius" qs += "&time_region=year~2001" qi = QueryInterface(qs) ops = qi.get_operations() ret = ops.execute() self.assertEqual(list(ret.children.keys()), [20, 30]) # self.assertEqual(ret[20]["calcs"].variables.keys(), ["the_mean", "the_median", "the_p"]) self.assertEqual( get_variable_names( ret.get_element(container_ugid=20).data_variables), ("the_mean", "the_median", "the_p")) actual = ret.get_element(container_ugid=30, field_name='calcs') self.assertEqual(actual['the_mean'].shape, (12, 2, 2)) # self.assertEqual(ret[30]["calcs"].variables["the_mean"].shape, (1, 12, 1, 2, 2)) self.assertEqual(ops.calc[2]["ref"], FrequencyPercentile) self.assertEqual(ops.output_crs, CoordinateReferenceSystem(epsg=4326))
def test_get_operations(self): rd = self.test_data.get_rd('cancm4_tas') qs = "uri={0}&spatial_operation=intersects&geom=state_boundaries&geom_select_uid=20|30&output_format=shp&snippet=true".format( rd.uri) qi = QueryInterface(qs) ops = qi.get_operations() self.assertIsInstance(ops._get_object_('dataset'), Dataset) self.assertEqual(list(ops.dataset)[0].uri, rd.uri) self.assertIsInstance(ops.geom, GeomCabinetIterator) self.assertEqual(ops.spatial_operation, 'intersects') self.assertEqual(ops.geom_select_uid, (20, 30)) qs = "uri={0}&spatial_operation=intersects&geom=state_boundaries&geom_select_uid=20|30".format(rd.uri) qs += "&calc=mean~the_mean|median~the_median|freq_perc~the_p!percentile~90&calc_grouping=month|year&field_name=calcs" qs += "&output_crs=4326&conform_units_to=celsius" qs += "&time_region=year~2001" qi = QueryInterface(qs) ops = qi.get_operations() ret = ops.execute() self.assertEqual(list(ret.children.keys()), [20, 30]) # self.assertEqual(ret[20]["calcs"].variables.keys(), ["the_mean", "the_median", "the_p"]) self.assertEqual(get_variable_names(ret.get_element(container_ugid=20).data_variables), ("the_mean", "the_median", "the_p")) actual = ret.get_element(container_ugid=30, field_name='calcs') self.assertEqual(actual['the_mean'].shape, (12, 2, 2)) # self.assertEqual(ret[30]["calcs"].variables["the_mean"].shape, (1, 12, 1, 2, 2)) self.assertEqual(ops.calc[2]["ref"], FrequencyPercentile) self.assertEqual(ops.output_crs, CoordinateReferenceSystem(epsg=4326))
def test(self): path1 = self.write_field_data('data1') path2 = self.write_field_data('data2') path3 = self.write_field_data('basis_var') time_range = [datetime(2000, 3, 1), datetime(2000, 3, 31)] rds = [RequestDataset(p, time_range=time_range) for p in [path1, path2]] mrd = MultiRequestDataset(rds) basis = RequestDataset(path3, time_range=[datetime(2000, 8, 1), datetime(2000, 8, 31)]) basis_field = basis.get() calc = [{'func': 'mfpf', 'name': 'output_mfpf', 'kwds': {'reference': ('data1', 'data2'), 'basis': basis_field}}] ops = OcgOperations(dataset=mrd, calc=calc) ret = ops.execute() actual_field = ret.get_element() actual_variables = get_variable_names(actual_field.data_variables) self.assertEqual(actual_variables, ('diff_data1_basis_var', 'diff_data2_basis_var')) sums = [v.get_value().sum() for v in actual_field.data_variables] for s in sums: self.assertAlmostEqual(s, 7.8071042497325145)
def test_system_add_variable(self): """Test adding variables from spatial collections.""" # Create a few separate fields. variable_names = tuple(['a', 'b', 'c']) fields = [self.get_field(variable_name=v) for v in variable_names] # Create spatial collections containing those fields. scs = [] for field in fields: sc = SpatialCollection() sc.add_field(field, None) scs.append(sc) # Destination spatial collection to add variables to from source spatial collections. grow = scs[0] # Loop over source fields. for idx in range(1, len(scs)): # Loop over child fields and spatial containers in the current source spatial collection. for field, container in scs[idx].iter_fields(yield_container=True): # TODO: This should be adjusted to allow easier selection with empty fields. try: # Case when we have spatial containers. grow_field = grow.get_element(field_name=field.name, container_ugid=container) except KeyError: # Case without spatial containers. grow_field = grow.get_element(field.name) # Add data variables to the grow field. for dv in field.data_variables: grow_field.add_variable(dv.extract(), is_data=True) # Assert all variables are present on the grow field. actual = grow.get_element() self.assertEqual(get_variable_names(actual.data_variables), variable_names) # Write the spatial collection using a converter. conv = NcConverter([grow], outdir=self.current_dir_output, prefix='out.nc') conv.write() # Assert all variables are present. rd = RequestDataset(conv.path) actual = rd.get() self.assertEqual(get_variable_names(actual.data_variables), variable_names)
def test_system_through_operations(self): mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd) ret = ops.execute() field = ret.get_element() actual = get_variable_names(field.data_variables) self.assertEqual(actual, self.f_variable_names) mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd, output_format='nc') ret = ops.execute() actual_field = RequestDataset(ret).get() actual = get_variable_names(actual_field.data_variables) self.assertEqual(actual, self.f_variable_names) actual_diff = actual_field.data_variables[1].get_value() - actual_field.data_variables[0].get_value() self.assertAlmostEqual(actual_diff.mean(), 1.0)
def set_crs(self, variable): """ Set the coordinate reference system variable name. :param variable: :class:`str` | :class:`~ocgis.Variable` """ variable = get_variable_names(variable)[0] entry = self._get_entry_(DMK.CRS) entry[DMK.VARIABLE] = variable
def test_system_through_operations(self): mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd) ret = ops.execute() field = ret.get_element() actual = get_variable_names(field.data_variables) self.assertEqual(actual, self.f_variable_names) mrd = self.get_multirequestdataset() ops = OcgOperations(dataset=mrd, output_format='nc') ret = ops.execute() actual_field = RequestDataset(ret).get() actual = get_variable_names(actual_field.data_variables) self.assertEqual(actual, self.f_variable_names) actual_diff = actual_field.data_variables[1].get_value( ) - actual_field.data_variables[0].get_value() self.assertAlmostEqual(actual_diff.mean(), 1.0)
def test_system_units_validation_equal_units(self): # Heat index coefficients require the data be in specific units. field = self.get_field(name='tasmax', units='fahrenheit', with_value=True) field_rhs = self.get_field(name='rhsmax', units='percent', with_value=True) with orphaned(field_rhs['rhsmax']): field.add_variable(field_rhs['rhsmax'], is_data=True) self.assertEqual(set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))), set(['tasmax', 'rhsmax'])) hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'}) vc = hi.execute() self.assertIsInstance(vc, VariableCollection)
def set_bounds(self, entry_key, bounds): """ Set the bounds variable name for ``entry_key``. :param str entry_key: See :class:`ocgis.constants.DimensionMapKey` for valid entry keys. :param bounds: :class:`str` | :class:`~ocgis.Variable` """ name = get_variable_names(bounds)[0] entry = self._get_entry_(entry_key) if entry[DMK.VARIABLE] is None: raise DimensionMapError(entry_key, 'No variable set. Bounds may not be set.') entry[DMK.BOUNDS] = name
def test_system_units_validation_wrong_units(self): # Heat index coefficients require the data be in specific units. field = self.get_field(name='tasmax', units='kelvin', with_value=True) field_rhs = self.get_field(name='rhsmax', units='percent', with_value=True) with orphaned(field_rhs['rhsmax']): field.add_variable(field_rhs['rhsmax'], is_data=True) self.assertEqual(set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))), {'tasmax', 'rhsmax'}) hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'}) with self.assertRaises(UnitsValidationError): hi.execute()
def add_variable(self, variable, force=False, is_data=False): """ ..note:: Accepts all parameters to :meth:`~ocgis.VariableCollection.add_variable`. Additional keyword arguments are: :param bool is_data: If ``True``, the variable is considered a data variable. """ super(Field, self).add_variable(variable, force=force) if is_data: tagged = get_variable_names(self.get_by_tag(TagName.DATA_VARIABLES, create=True)) if variable.name not in tagged: self.append_to_tags(TagName.DATA_VARIABLES, variable.name)
def test_as_field(self): """Test iteration returned as field objects.""" select_ugid = [16, 17, 51] desired_data_variables = ('UGID', 'STATE_FIPS', 'ID', 'STATE_NAME', 'STATE_ABBR') sci = GeomCabinetIterator(key='state_boundaries', select_uid=select_ugid, as_field=True) for _ in range(2): ugids = [] for field in sci: self.assertIsInstance(field, Field) self.assertEqual(field.geom.ugid.shape, (1,)) self.assertEqual(get_variable_names(field.data_variables), desired_data_variables) self.assertEqual(field.crs, WGS84()) ugids.append(field.geom.ugid.get_value()[0])
def format_return_field(function_tag, out_field, new_temporal=None): # Remove the variables used by the calculation. try: to_remove = get_variable_names(out_field.get_by_tag(function_tag)) except KeyError: # Let this fail quietly as the tag may not exist on incoming fields. pass else: for tr in to_remove: out_field.remove_variable(tr) # Remove the original time variable and replace with the new one if there is a new time dimension. New # time dimensions may not be present for calculations that do not compute one. if new_temporal is not None: out_field.remove_variable(out_field.time) out_field.set_time(new_temporal, force=True)
def test_system_units_validation_equal_units(self): # Heat index coefficients require the data be in specific units. field = self.get_field(name='tasmax', units='fahrenheit', with_value=True) field_rhs = self.get_field(name='rhsmax', units='percent', with_value=True) with orphaned(field_rhs['rhsmax']): field.add_variable(field_rhs['rhsmax'], is_data=True) self.assertEqual( set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))), set(['tasmax', 'rhsmax'])) hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'}) vc = hi.execute() self.assertIsInstance(vc, VariableCollection)
def test_system_units_validation_wrong_units(self): # Heat index coefficients require the data be in specific units. field = self.get_field(name='tasmax', units='kelvin', with_value=True) field_rhs = self.get_field(name='rhsmax', units='percent', with_value=True) with orphaned(field_rhs['rhsmax']): field.add_variable(field_rhs['rhsmax'], is_data=True) self.assertEqual( set(get_variable_names(field.get_by_tag(TagName.DATA_VARIABLES))), {'tasmax', 'rhsmax'}) hi = HeatIndex(field=field, parms={'tas': 'tasmax', 'rhs': 'rhsmax'}) with self.assertRaises(UnitsValidationError): hi.execute()
def test_full(self): """Compute the dissimilarity will all metrics.""" from flyingpigeon import dissimilarity from matplotlib import pyplot as plt p1 = self.write_field_data('v1', ncol=1, nrow=1) p2 = self.write_field_data('v2', ncol=1, nrow=1) p3 = self.write_field_data('v1', ncol=11, nrow=10, dir='c') p4 = self.write_field_data('v2', ncol=11, nrow=10, dir='c') ref_range = [dt.datetime(2000, 3, 1), dt.datetime(2000, 3, 31)] ref = [ocgis.RequestDataset(p, time_range=ref_range) for p in [p1, p2]] reference = ocgis.MultiRequestDataset(ref) reference = reference.get() cand_range = [dt.datetime(2000, 8, 1), dt.datetime(2000, 8, 31)] can = [ ocgis.RequestDataset(p, time_range=cand_range) for p in [p3, p4] ] candidate = ocgis.MultiRequestDataset(can) fig, axes = plt.subplots(2, 3) for i, dist in enumerate(dissimilarity.__all__): calc = [{ 'func': 'dissimilarity', 'name': 'output_mfpf', 'kwds': { 'target': reference, 'candidate': ('v1', 'v2'), 'dist': dist } }] ops = OcgOperations(dataset=candidate, calc=calc) ret = ops.execute() out_field = ret.get_element() var_name = get_variable_names(out_field.data_variables)[0] out = out_field[var_name].get_value()[0, 0] axes.flat[i].imshow(out) axes.flat[i].set_title(dist) path = os.path.join(test_output_path, 'test_spatial_analog_metrics.png') plt.savefig(path) plt.close()
def test_system_converting_state_boundaries_shapefile_memory(self): """Test iteration may be used in place of loading all values from source.""" rd = RequestDataset(uri=self.path_state_boundaries) field = rd.get() data_variable_names = get_variable_names(field.data_variables) field.geom.protected = True sub = field.get_field_slice({'geom': slice(10, 20)}) self.assertTrue(sub.geom.protected) self.assertFalse(sub.geom.has_allocated_value) self.assertIsInstance(sub, Field) self.assertIsInstance(sub.geom, GeometryVariable) gc = sub.geom.convert_to(use_geometry_iterator=True) self.assertIsInstance(gc, PolygonGC) self.assertFalse(sub.geom.has_allocated_value) self.assertTrue(field.geom.protected) path = self.get_temporary_file_path('out.nc') gc.parent.write(path)
def test_get_intersects(self): subset = box(100.7, 39.71, 102.30, 42.30) desired_manual = [[[40.0, 40.0], [41.0, 41.0], [42.0, 42.0]], [[101.0, 102.0], [101.0, 102.0], [101.0, 102.0]]] desired_manual = np.array(desired_manual) grid = self.get_gridxy(crs=WGS84()) # self.write_fiona_htmp(grid, 'grid') # self.write_fiona_htmp(GeometryVariable(value=subset), 'subset') sub, sub_slc = grid.get_intersects(subset, return_slice=True) self.assertFalse(sub.has_allocated_point) self.assertFalse(sub.has_allocated_polygon) self.assertFalse(sub.has_allocated_abstraction_geometry) # self.write_fiona_htmp(sub, 'sub') self.assertEqual(sub_slc, (slice(0, 3, None), slice(0, 2, None))) self.assertNumpyAll(sub.get_value_stacked(), desired_manual) point = sub.get_point() self.assertEqual(point.crs, grid.crs) # Test masks are updated. grid = self.get_gridxy(with_xy_bounds=True, with_parent=True) for t in ['xbounds', 'ybounds']: self.assertIn(t, grid.parent) subset = 'Polygon ((100.81193771626298883 42.17577854671281301, 101.13166089965399408 42.21211072664360842, 101.34965397923876651 41.18754325259516236, 103.68944636678200766 41.34013840830451159, 103.63858131487890546 41.22387543252595776, 100.77560553633219342 41.08581314878893664, 100.81193771626298883 42.17577854671281301))' subset = wkt.loads(subset) sub = grid.get_intersects(subset, cascade=True) self.assertTrue(sub.get_mask().any()) self.assertTrue(sub.get_abstraction_geometry().get_mask().any()) mask_slice = {'ydim': slice(1, 2), 'xdim': slice(1, 3)} sub_member_variables = get_variable_names(sub.get_member_variables()) for v in list(sub.parent.values()): if v.name in sub_member_variables and not isinstance( v, GeometryVariable) and v.name != sub.mask_variable.name: self.assertIsNone(v.get_mask()) else: self.assertTrue(v.get_mask().any()) self.assertFalse(v.get_mask().all()) actual = v[mask_slice].get_mask() self.assertTrue(np.all(actual))
def test_system_masking(self): """Test behavior of the grid mask. This is an independently managed variable.""" x = Variable('xc', value=[1, 2, 3], dimensions='dimx') y = Variable('yc', value=[10, 20, 30, 40], dimensions='dimy') grid = Grid(x, y) data = Variable('data', value=np.zeros(grid.shape), dimensions=['dimy', 'dimx']) grid.parent.add_variable(data) gmask = grid.get_mask() self.assertIsNone(gmask) self.assertIsNone(grid.mask_variable) new_mask = np.zeros(grid.shape, dtype=bool) new_mask[1, 1] = True grid.set_mask(new_mask, cascade=True) self.assertIsInstance(grid.mask_variable, Variable) actual = grid.get_mask() self.assertNumpyAll(actual, new_mask) actual = get_variable_names(grid.get_member_variables()) desired = [x.name, y.name, grid._mask_name] self.assertAsSetEqual(actual, desired) self.assertNumpyAll(grid.get_mask(), data.get_mask()) path = self.get_temporary_file_path('foo.nc') grid.parent.write(path) with self.nc_scope(path) as ds: actual = ds.variables[grid.mask_variable.name] self.assertNumpyAll(grid.get_mask(), actual[:].mask) # Test mask is used when read from file. actual_field = RequestDataset(path).get() self.assertNumpyAll(grid.get_mask(), actual_field.grid.get_mask()) self.assertEqual(actual_field.grid.get_mask().sum(), 1) self.assertTrue(actual_field.grid.is_vectorized) self.assertEqual(actual_field.grid.get_mask().dtype, bool) actual_field.set_abstraction_geom() self.assertNumpyAll(actual_field.geom.get_mask(), grid.get_mask())
def test_system_converting_state_boundaries_shapefile_memory(self): """Test iteration may be used in place of loading all values from source.""" rd = RequestDataset(uri=self.path_state_boundaries) field = rd.get() data_variable_names = get_variable_names(field.data_variables) field.geom.protected = True sub = field.get_field_slice({'geom': slice(10, 20)}) self.assertTrue(sub.geom.protected) self.assertFalse(sub.geom.has_allocated_value) self.assertIsInstance(sub, Field) self.assertIsInstance(sub.geom, GeometryVariable) gc = sub.geom.convert_to(use_geometry_iterator=True) self.assertIsInstance(gc, PolygonGC) # Test the new object does not share data with the source. for dn in data_variable_names: self.assertNotIn(dn, gc.parent) self.assertFalse(sub.geom.has_allocated_value) self.assertTrue(field.geom.protected) path = self.get_temporary_file_path('out.nc') gc.parent.write(path)
def get_unioned(self, dimensions=None, union_dimension=None, spatial_average=None, root=0): """ Unions _unmasked_ geometry objects. Collective across the current :class:`~ocgis.OcgVM`. """ # TODO: optimize! # Get dimension names and lengths for the dimensions to union. if dimensions is None: dimensions = self.dimensions dimension_names = get_dimension_names(dimensions) dimension_lengths = [ len(self.parent.dimensions[dn]) for dn in dimension_names ] # Get the variables to spatial average. if spatial_average is not None: variable_names_to_weight = get_variable_names(spatial_average) else: variable_names_to_weight = [] # Get the new dimensions for the geometry variable. The union dimension is always the last dimension. if union_dimension is None: from ocgis.variable.dimension import Dimension union_dimension = Dimension( constants.DimensionName.UNIONED_GEOMETRY, 1) new_dimensions = [] for dim in self.dimensions: if dim.name not in dimension_names: new_dimensions.append(dim) new_dimensions.append(union_dimension) # Configure the return variable. ret = self.copy() if spatial_average is None: ret = ret.extract() ret.set_mask(None) ret.set_value(None) ret.set_dimensions(new_dimensions) ret.allocate_value() # Destination indices in the return variable are filled with non-masked, unioned geometries. for dst_indices in product( * [list(range(dl)) for dl in get_dimension_lengths(new_dimensions)]): dst_slc = { new_dimensions[ii].name: dst_indices[ii] for ii in range(len(new_dimensions)) } # Select the geometries to union skipping any masked geometries. to_union = deque() for indices in product( *[list(range(dl)) for dl in dimension_lengths]): dslc = { dimension_names[ii]: indices[ii] for ii in range(len(dimension_names)) } sub = self[dslc] sub_mask = sub.get_mask() if sub_mask is None: to_union.append(sub.get_value().flatten()[0]) else: if not sub_mask.flatten()[0]: to_union.append(sub.get_value().flatten()[0]) # Execute the union operation. processed_to_union = deque() for geom in to_union: if isinstance(geom, MultiPolygon) or isinstance( geom, MultiPoint): for element in geom: processed_to_union.append(element) else: processed_to_union.append(geom) unioned = cascaded_union(processed_to_union) # Pull unioned geometries and union again for the final unioned geometry. if vm.size > 1: unioned_gathered = vm.gather(unioned) if vm.rank == root: unioned = cascaded_union(unioned_gathered) # Fill the return geometry variable value with the unioned geometry. to_fill = ret[dst_slc].get_value() to_fill[0] = unioned # Spatial average shared dimensions. if spatial_average is not None: # Get source data to weight. for var_to_weight in filter( lambda ii: ii.name in variable_names_to_weight, list(self.parent.values())): # Holds sizes of dimensions to iterate. These dimension are not squeezed by the weighted averaging. range_to_itr = [] # Holds the names of dimensions to squeeze. names_to_itr = [] # Dimension names that are squeezed. Also the dimensions for the weight matrix. names_to_slice_all = [] for dn in var_to_weight.dimensions: if dn.name in self.dimension_names: names_to_slice_all.append(dn.name) else: range_to_itr.append(len(dn)) names_to_itr.append(dn.name) # Reference the weights on the source geometry variable. weights = self[{ nsa: slice(None) for nsa in names_to_slice_all }].weights # Path if there are iteration dimensions. Checks for axes ordering in addition. if len(range_to_itr) > 0: # New dimensions for the spatially averaged variable. Unioned dimension is always last. Remove the # dimensions aggregated by the weighted average. new_dimensions = [ dim for dim in var_to_weight.dimensions if dim.name not in dimension_names ] new_dimensions.append(union_dimension) # Prepare the spatially averaged variable. target = ret.parent[var_to_weight.name] target.set_mask(None) target.set_value(None) target.set_dimensions(new_dimensions) target.allocate_value() # Swap weight axes to make sure they align with the target variable. swap_chain = get_swap_chain(dimension_names, names_to_slice_all) if len(swap_chain) > 0: weights = weights.copy() for sc in swap_chain: weights = weights.swapaxes(*sc) # The main weighting loop. Can get quite intensive with many, large iteration dimensions. len_names_to_itr = len(names_to_itr) slice_none = slice(None) squeeze_out = [ ii for ii, dim in enumerate(var_to_weight.dimensions) if dim.name in names_to_itr ] should_squeeze = True if len(squeeze_out) > 0 else False np_squeeze = np.squeeze np_atleast_1d = np.atleast_1d np_ma_average = np.ma.average for nonweighted_indices in product( *[list(range(ri)) for ri in range_to_itr]): w_slc = { names_to_itr[ii]: nonweighted_indices[ii] for ii in range(len_names_to_itr) } for nsa in names_to_slice_all: w_slc[nsa] = slice_none data_to_weight = var_to_weight[w_slc].get_masked_value( ) if should_squeeze: data_to_weight = np_squeeze( data_to_weight, axis=tuple(squeeze_out)) weighted_value = np_atleast_1d( np_ma_average(data_to_weight, weights=weights)) target[w_slc].get_value()[:] = weighted_value else: target_to_weight = var_to_weight.get_masked_value() # Sort to minimize floating point sum errors. target_to_weight = target_to_weight.flatten() weights = weights.flatten() sindices = np.argsort(target_to_weight) target_to_weight = target_to_weight[sindices] weights = weights[sindices] weighted_value = np.atleast_1d( np.ma.average(target_to_weight, weights=weights)) target = ret.parent[var_to_weight.name] target.set_mask(None) target.set_value(None) target.set_dimensions(new_dimensions) target.set_value(weighted_value) # Collect areas of live ranks and convert to weights. if vm.size > 1: # If there is no area information (points for example, we need to use counts). if ret.area.data[0].max() == 0: weight_or_proxy = float(self.size) else: weight_or_proxy = ret.area.data[0] if vm.rank != root: vm.comm.send(weight_or_proxy, dest=root) else: live_rank_areas = [weight_or_proxy] for tner in vm.ranks: if tner != vm.rank: recv_area = vm.comm.recv(source=tner) live_rank_areas.append(recv_area) live_rank_areas = np.array(live_rank_areas) rank_weights = live_rank_areas / np.max(live_rank_areas) for var_to_weight in filter( lambda ii: ii.name in variable_names_to_weight, list(ret.parent.values())): dimensions_to_itr = [ dim.name for dim in var_to_weight.dimensions if dim.name != union_dimension.name ] slc = {union_dimension.name: 0} for idx_slc in var_to_weight.iter_dict_slices( dimensions=dimensions_to_itr): idx_slc.update(slc) to_weight = var_to_weight[idx_slc].get_value().flatten( )[0] if vm.rank == root: collected_to_weight = [to_weight] if not vm.rank == root: vm.comm.send(to_weight, dest=root) else: for tner in vm.ranks: if not tner == root: recv_to_weight = vm.comm.recv(source=tner) collected_to_weight.append(recv_to_weight) # Sort to minimize floating point sum errors. collected_to_weight = np.array(collected_to_weight) sindices = np.argsort(collected_to_weight) collected_to_weight = collected_to_weight[sindices] rank_weights = rank_weights[sindices] weighted = np.atleast_1d( np.ma.average(collected_to_weight, weights=rank_weights)) var_to_weight[idx_slc].get_value()[:] = weighted if vm.rank == root: return ret else: return
def test_get(self): mrd = self.get_multirequestdataset() mfield = mrd.get() actual = get_variable_names(mfield.data_variables) self.assertEqual(actual, self.f_variable_names)
def __init__(self, variable, followers=None, value=None, mask=None, allow_masked=True, primary_mask=None, slice_remap=None, shape=None, melted=None, repeaters=None, formatter=None, clobber_masked=True): if melted is not None and followers is None: raise ValueError('"melted" must be None if there are no "followers".') self.variable = variable self.formatter = formatter self.allow_masked = allow_masked self.slice_remap = slice_remap self.clobber_masked = clobber_masked if variable.repeat_record is not None: if repeaters is None: repeaters = variable.repeat_record else: repeaters += variable.repeat_record self.repeaters = repeaters self._is_lead = True if melted is None: melted_repeaters = None else: melted_repeaters = {} for m in melted: try: if m.repeat_record is not None: melted_repeaters[m.name] = m.repeat_record except AttributeError: if m.repeaters is not None: melted_repeaters[m.name] = m.repeat_record self.melted_repeaters = melted_repeaters if melted is not None: melted = get_variable_names(melted) self.melted = melted if shape is None: shape = self.variable.shape self.shape = shape if primary_mask is None: primary_mask = variable.name else: primary_mask = get_variable_names(primary_mask)[0] self.primary_mask = primary_mask if value is None: self.value = variable.get_value() else: self.value = value if mask is None: self.mask = variable.get_mask() else: self.mask = mask if followers is not None: dimensions = get_dimension_names(self.variable.dimensions) followers = get_followers(followers) for fidx, follower in enumerate(followers): if isinstance(follower, self.__class__): iterator = follower follower = follower.variable else: iterator = Iterator(follower, allow_masked=allow_masked, primary_mask=primary_mask) follower_dimensions = get_dimension_names(follower.dimensions) set_follower_dimensions = set(follower_dimensions) set_dimensions = set(dimensions) if not set_follower_dimensions.issubset(set_dimensions): msg = 'Follower variable "{}" dimensions are not a subset of the lead variable.'.format( follower.name) raise ValueError(msg) if follower_dimensions != dimensions: follower_slice_remap = [] for d in follower_dimensions: if d in set_dimensions: follower_slice_remap.append(dimensions.index(d)) iterator.slice_remap = follower_slice_remap iterator.shape = self.shape iterator._is_lead = False followers[fidx] = iterator self.iterators = [self] + followers self.followers = followers else: self.iterators = [self] self.followers = None self._is_recursing = False
def __init__(self, **kwargs): kwargs = kwargs.copy() dimension_map = kwargs.pop('dimension_map', None) # Flag updated by driver to indicate if the coordinate system is assigned or implied. self._has_assigned_coordinate_system = False # Flag to indicate if this is a regrid destination. self.regrid_destination = kwargs.pop('regrid_destination', False) # Flag to indicate if this is a regrid source. self.regrid_source = kwargs.pop('regrid_source', True) # Other incoming data objects may have a coordinate system which should be used. crs = kwargs.pop(KeywordArgument.CRS, 'auto') # Add grid variable metadata to dimension map. grid = kwargs.pop(KeywordArgument.GRID, 'auto') # Configure the driver. driver = kwargs.pop(KeywordArgument.DRIVER, 'auto') # Extract standard coordinate variables from the field keyword arguments. k = (DimensionMapKey.GEOM, DimensionMapKey.REALIZATION, DimensionMapKey.TIME, DimensionMapKey.LEVEL) s = OrderedDict() for ii in k: s[ii] = kwargs.pop(ii, None) grid_abstraction = kwargs.pop(KeywordArgument.GRID_ABSTRACTION, 'auto') if grid_abstraction is None: raise ValueError("'{}' may not be None.".format(KeywordArgument.GRID_ABSTRACTION)) grid_is_isomorphic = kwargs.pop('grid_is_isomorphic', 'auto') if grid_is_isomorphic is None: raise ValueError("'{}' may not be None.".format('grid_is_isomorphic')) # TODO: This should maybe be part of the dimension map? Time variables are not dependent on fields. self.format_time = kwargs.pop(KeywordArgument.FORMAT_TIME, True) # Use tags to set data variables. is_data = kwargs.pop(KeywordArgument.IS_DATA, []) VariableCollection.__init__(self, **kwargs) dimension_map = deepcopy(dimension_map) if dimension_map is None: dimension_map = DimensionMap() elif isinstance(dimension_map, dict): dimension_map = DimensionMap.from_dict(dimension_map) self.dimension_map = dimension_map self.set_grid(grid, crs=crs) if driver != 'auto': self.dimension_map.set_driver(driver) if grid_abstraction != 'auto': self.dimension_map.set_grid_abstraction(grid_abstraction) if grid_is_isomorphic != 'auto': self.dimension_map.set_property(DMK.IS_ISOMORPHIC, grid_is_isomorphic) # Append the data variable tagged variable names. is_data = list(get_iter(is_data, dtype=Variable)) is_data_variable_names = get_variable_names(is_data) for idvn in is_data_variable_names: self.append_to_tags(TagName.DATA_VARIABLES, idvn, create=True) for idx, dvn in enumerate(is_data_variable_names): if dvn not in self: if isinstance(is_data[idx], Variable): self.add_variable(is_data[idx]) # Configure the field updating the dimension map in the process. cvar = s[DimensionMapKey.REALIZATION] if cvar is not None: self.set_realization(cvar) cvar = s[DimensionMapKey.TIME] if cvar is not None: self.set_time(cvar) cvar = s[DimensionMapKey.LEVEL] if cvar is not None: self.set_level(cvar) cvar = s[DimensionMapKey.GEOM] if cvar is not None: self.set_geom(cvar, crs=crs) if crs != 'auto': self.set_crs(crs)
def __init__(self, variable, followers=None, value=None, mask=None, allow_masked=True, primary_mask=None, slice_remap=None, shape=None, melted=None, repeaters=None, formatter=None, clobber_masked=True): if melted is not None and followers is None: raise ValueError( '"melted" must be None if there are no "followers".') self.variable = variable self.formatter = formatter self.allow_masked = allow_masked self.slice_remap = slice_remap self.clobber_masked = clobber_masked if variable.repeat_record is not None: if repeaters is None: repeaters = variable.repeat_record else: repeaters += variable.repeat_record self.repeaters = repeaters self._is_lead = True if melted is None: melted_repeaters = None else: melted_repeaters = {} for m in melted: try: if m.repeat_record is not None: melted_repeaters[m.name] = m.repeat_record except AttributeError: if m.repeaters is not None: melted_repeaters[m.name] = m.repeat_record self.melted_repeaters = melted_repeaters if melted is not None: melted = get_variable_names(melted) self.melted = melted if shape is None: shape = self.variable.shape self.shape = shape if primary_mask is None: primary_mask = variable.name else: primary_mask = get_variable_names(primary_mask)[0] self.primary_mask = primary_mask if value is None: self.value = variable.get_value() else: self.value = value if mask is None: self.mask = variable.get_mask() else: self.mask = mask if followers is not None: dimensions = get_dimension_names(self.variable.dimensions) followers = get_followers(followers) for fidx, follower in enumerate(followers): if isinstance(follower, self.__class__): iterator = follower follower = follower.variable else: iterator = Iterator(follower, allow_masked=allow_masked, primary_mask=primary_mask) follower_dimensions = get_dimension_names(follower.dimensions) set_follower_dimensions = set(follower_dimensions) set_dimensions = set(dimensions) if not set_follower_dimensions.issubset(set_dimensions): msg = 'Follower variable "{}" dimensions are not a subset of the lead variable.'.format( follower.name) raise ValueError(msg) if follower_dimensions != dimensions: follower_slice_remap = [] for d in follower_dimensions: if d in set_dimensions: follower_slice_remap.append(dimensions.index(d)) iterator.slice_remap = follower_slice_remap iterator.shape = self.shape iterator._is_lead = False followers[fidx] = iterator self.iterators = [self] + followers self.followers = followers else: self.iterators = [self] self.followers = None self._is_recursing = False
def create_field(self, *args, **kwargs): """ Create a field object. In general, this should not be overloaded by subclasses. :keyword bool format_time: ``(=True)`` If ``False``, do not convert numeric times to Python date objects. :keyword str grid_abstraction: ``(='auto')`` If provided, use this grid abstraction. :keyword raw_field: ``(=None)`` If provided, modify this field instead. :type raw_field: None | :class:`~ocgis.Field` :param kwargs: Additional keyword arguments to :meth:`~ocgis.driver.base.AbstractDriver.create_raw_field`. :return: :class:`ocgis.Field` """ kwargs = kwargs.copy() raw_field = kwargs.pop('raw_field', None) format_time = kwargs.pop(KeywordArgument.FORMAT_TIME, True) grid_abstraction = kwargs.pop(KeywordArgument.GRID_ABSTRACTION, self.rd.grid_abstraction) grid_is_isomorphic = kwargs.pop('grid_is_isomorphic', self.rd.grid_is_isomorphic) if raw_field is None: # Get the raw variable collection from source. new_kwargs = kwargs.copy() new_kwargs['source_name'] = None raw_field = self.create_raw_field(*args, **new_kwargs) # Get the appropriate metadata for the collection. group_metadata = self.get_group_metadata(raw_field.group, self.metadata_source) # Always pull the dimension map from the request dataset. This allows it to be overloaded. dimension_map = self.get_group_metadata(raw_field.group, self.rd.dimension_map) # Modify the coordinate system variable. If it is overloaded on the request dataset, then the variable # collection needs to be updated to hold the variable and any alternative coordinate systems needs to be # removed. to_remove = None to_add = None crs = self.get_crs(group_metadata) if self.rd._has_assigned_coordinate_system: to_add = self.rd._crs if crs is not None: to_remove = crs.name else: if self.rd._crs is not None and self.rd._crs != 'auto': to_add = self.rd._crs if crs is not None: to_remove = crs.name elif crs is not None: to_add = crs if to_remove is not None: raw_field.pop(to_remove, None) if to_add is not None: raw_field.add_variable(to_add, force=True) # Overload the dimension map with the CRS. if to_add is not None: # dimension_map[DimensionMapKey.CRS][DimensionMapKey.VARIABLE] = to_add.name dimension_map.set_crs(to_add.name) # Remove the mask variable if present in the raw dimension map and the source dimension map is set to None. if self.rd.dimension_map.get_spatial_mask() is None and self.dimension_map_raw.get_spatial_mask() is not None: raw_field.pop(self.dimension_map_raw.get_spatial_mask()) # Convert the raw variable collection to a field. # TODO: Identify a way to remove this code block; field should be appropriately initialized; format_time and grid_abstraction are part of a dimension map. kwargs[KeywordArgument.DIMENSION_MAP] = dimension_map kwargs[KeywordArgument.FORMAT_TIME] = format_time if grid_abstraction != 'auto': kwargs[KeywordArgument.GRID_ABSTRACTION] = grid_abstraction if grid_is_isomorphic != 'auto': kwargs['grid_is_isomorphic'] = grid_is_isomorphic field = Field.from_variable_collection(raw_field, *args, **kwargs) # If this is a source grid for regridding, ensure the flag is updated. field.regrid_source = self.rd.regrid_source # Update the assigned coordinate system flag. field._has_assigned_coordinate_system = self.rd._has_assigned_coordinate_system # Apply any requested subsets. if self.rd.time_range is not None: field = field.time.get_between(*self.rd.time_range).parent if self.rd.time_region is not None: field = field.time.get_time_region(self.rd.time_region).parent if self.rd.time_subset_func is not None: field = field.time.get_subset_by_function(self.rd.time_subset_func).parent if self.rd.level_range is not None: field = field.level.get_between(*self.rd.level_range).parent # These variables have all the dimensions needed for a data classification. Use overloaded values from the # request dataset if they are provided. try: data_variable_names = list(get_variable_names(self.rd.rename_variable)) except NoDataVariablesFound: # It is okay to have no data variables in a field. data_variable_names = [] pass for dvn in data_variable_names: field.append_to_tags(TagName.DATA_VARIABLES, dvn, create=True) # Load child fields. for child in list(field.children.values()): kwargs['raw_field'] = child field.children[child.name] = self.create_field(*args, **kwargs) return field
def get_field(self, *args, **kwargs): vc = kwargs.pop('vc', None) format_time = kwargs.pop(KeywordArgument.FORMAT_TIME, True) if KeywordArgument.GRID_ABSTRACTION in kwargs: grid_abstraction = kwargs.pop(KeywordArgument.GRID_ABSTRACTION) else: grid_abstraction = constants.UNINITIALIZED if vc is None: # Get the raw variable collection from source. new_kwargs = kwargs.copy() new_kwargs['source_name'] = None vc = self.get_variable_collection(*args, **new_kwargs) # Get the appropriate metadata for the collection. group_metadata = self.get_group_metadata(vc.group, self.metadata_source) # Always pull the dimension map from the request dataset. This allows it to be overloaded. dimension_map = self.get_group_metadata(vc.group, self.rd.dimension_map) # Modify the coordinate system variable. If it is overloaded on the request dataset, then the variable # collection needs to be updated to hold the variable and any alternative coordinate systems needs to be # removed. to_remove = None to_add = None crs = self.get_crs(group_metadata) if self.rd._has_assigned_coordinate_system: to_add = self.rd._crs if crs is not None: to_remove = crs.name else: if self.rd._crs is not None and self.rd._crs != 'auto': to_add = self.rd._crs if crs is not None: to_remove = crs.name elif crs is not None: to_add = crs if to_remove is not None: vc.pop(to_remove, None) if to_add is not None: vc.add_variable(to_add, force=True) # Overload the dimension map with the CRS. if to_add is not None: # dimension_map[DimensionMapKey.CRS][DimensionMapKey.VARIABLE] = to_add.name dimension_map.set_crs(to_add.name) # Remove the mask variable if present in the raw dimension map and the source dimension map is set to None. if self.rd.dimension_map.get_spatial_mask() is None and self.dimension_map_raw.get_spatial_mask() is not None: vc.pop(self.dimension_map_raw.get_spatial_mask()) # Convert the raw variable collection to a field. kwargs['dimension_map'] = dimension_map kwargs[KeywordArgument.FORMAT_TIME] = format_time if grid_abstraction != constants.UNINITIALIZED: kwargs[KeywordArgument.GRID_ABSTRACTION] = grid_abstraction field = Field.from_variable_collection(vc, *args, **kwargs) # If this is a source grid for regridding, ensure the flag is updated. field.regrid_source = self.rd.regrid_source # Update the assigned coordinate system flag. field._has_assigned_coordinate_system = self.rd._has_assigned_coordinate_system # Apply any requested subsets. if self.rd.time_range is not None: field = field.time.get_between(*self.rd.time_range).parent if self.rd.time_region is not None: field = field.time.get_time_region(self.rd.time_region).parent if self.rd.time_subset_func is not None: field = field.time.get_subset_by_function(self.rd.time_subset_func).parent if self.rd.level_range is not None: field = field.level.get_between(*self.rd.level_range).parent # These variables have all the dimensions needed for a data classification. Use overloaded values from the # request dataset if they are provided. try: data_variable_names = list(get_variable_names(self.rd.rename_variable)) except NoDataVariablesFound: # It is okay to have no data variables in a field. data_variable_names = [] pass for dvn in data_variable_names: field.append_to_tags(TagName.DATA_VARIABLES, dvn, create=True) # Load child fields. for child in list(field.children.values()): kwargs['vc'] = child field.children[child.name] = self.get_field(*args, **kwargs) return field
def set_variable(self, entry_key, variable, dimension=None, bounds=None, attrs=None, pos=None, dimensionless=False, section=None): """ Set coordinate variable information for ``entry_key``. :param str entry_key: See :class:`ocgis.constants.DimensionMapKey` for valid entry keys. :param variable: The variable to set. Use a variable object to auto-fill additional fields if they are ``None``. :type variable: :class:`str` | :class:`~ocgis.Variable` :param dimension: A sequence of dimension names. If ``None``, they will be pulled from ``variable`` if it is a variable object. :param bounds: See :meth:`~ocgis.DimensionMap.set_bounds`. :param dict attrs: Default attributes for the coordinate variables. If ``None``, they will be pulled from ``variable`` if it is a variable object. :param int pos: The representative dimension position in ``variable`` if ``variable`` has more than one dimension. For example, a latitude variable may have two dimensions ``(lon, lat)``. The mapper must determine which dimension position is representative for the latitude variable when slicing. :param section: A slice-like tuple used to extract the data out of its source variable into a single variable format. :type section: tuple >>> section = (None, 0) >>> # This will be converted to a slice. >>> [slice(None), slice(0, 1)] :param bool dimensionless: If ``True``, this variable has no canonical dimension. :raises: DimensionMapError """ if entry_key in self._special_entry_keys: raise DimensionMapError(entry_key, "The entry '{}' has a special set method.".format(entry_key)) if section is not None and (pos is None and dimension is None): raise DimensionMapError(entry_key, "If a section is provided, position or dimension must be defined.") entry = self._get_entry_(entry_key) if variable is None: self._storage.pop(entry_key) return try: if bounds is None: bounds = variable.bounds if dimension is None: if variable.ndim > 1: if pos is None and not dimensionless: msg = "A position (pos) is required if no dimension is provided and target variable has " \ "greater than one dimension." raise DimensionMapError(entry_key, msg) elif variable.ndim == 1: pos = 0 else: pos = None # We can have scalar dimensions. if pos is not None and not dimensionless: dimension = variable.dimensions[pos] except AttributeError: # Assume string type. pass value = get_variable_names(variable)[0] if bounds is not None: bounds = get_variable_names(bounds)[0] if dimension is None: dimension = [] else: dimension = list(get_dimension_names(dimension)) if attrs is None: try: attrs = self._storage.__class__(deepcopy(DIMENSION_MAP_TEMPLATE[entry_key][DMK.ATTRS])) except KeyError: # Default attributes are empty. attrs = self._storage.__class__() # Allow for any variable attributes. if hasattr(variable, 'attrs'): attrs.update(variable.attrs) # Dimension map attributes always take precedence. Dimension map attrs > Variable Attributes > Default Attributes current_attrs = self.get_attrs(entry_key) if current_attrs is None: current_attrs = self._storage.__class__() attrs.update(current_attrs) entry[DMK.VARIABLE] = value entry[DMK.BOUNDS] = bounds entry[DMK.DIMENSION] = dimension entry[DMK.ATTRS] = attrs if section is not None: entry[DMK.SECTION] = section
def create_field(self, *args, **kwargs): """ Create a field object. In general, this should not be overloaded by subclasses. :keyword bool format_time: ``(=True)`` If ``False``, do not convert numeric times to Python date objects. :keyword str grid_abstraction: ``(='auto')`` If provided, use this grid abstraction. :keyword raw_field: ``(=None)`` If provided, modify this field instead. :type raw_field: None | :class:`~ocgis.Field` :param kwargs: Additional keyword arguments to :meth:`~ocgis.driver.base.AbstractDriver.create_raw_field`. :return: :class:`ocgis.Field` """ kwargs = kwargs.copy() raw_field = kwargs.pop('raw_field', None) format_time = kwargs.pop(KeywordArgument.FORMAT_TIME, True) grid_abstraction = kwargs.pop(KeywordArgument.GRID_ABSTRACTION, self.rd.grid_abstraction) grid_is_isomorphic = kwargs.pop('grid_is_isomorphic', self.rd.grid_is_isomorphic) if raw_field is None: # Get the raw variable collection from source. new_kwargs = kwargs.copy() new_kwargs['source_name'] = None raw_field = self.create_raw_field(*args, **new_kwargs) # Get the appropriate metadata for the collection. group_metadata = self.get_group_metadata(raw_field.group, self.metadata_source) # Always pull the dimension map from the request dataset. This allows it to be overloaded. dimension_map = self.get_group_metadata(raw_field.group, self.rd.dimension_map) # Modify the coordinate system variable. If it is overloaded on the request dataset, then the variable # collection needs to be updated to hold the variable and any alternative coordinate systems needs to be # removed. to_remove = None to_add = None crs = self.get_crs(group_metadata) if self.rd._has_assigned_coordinate_system: to_add = self.rd._crs if crs is not None: to_remove = crs.name else: if self.rd._crs is not None and self.rd._crs != 'auto': to_add = self.rd._crs if crs is not None: to_remove = crs.name elif crs is not None: to_add = crs if to_remove is not None: raw_field.pop(to_remove, None) if to_add is not None: raw_field.add_variable(to_add, force=True) # Overload the dimension map with the CRS. if to_add is not None: # dimension_map[DimensionMapKey.CRS][DimensionMapKey.VARIABLE] = to_add.name dimension_map.set_crs(to_add.name) # Remove the mask variable if present in the raw dimension map and the source dimension map is set to None. if self.rd.dimension_map.get_spatial_mask( ) is None and self.dimension_map_raw.get_spatial_mask() is not None: raw_field.pop(self.dimension_map_raw.get_spatial_mask()) # Convert the raw variable collection to a field. # TODO: Identify a way to remove this code block; field should be appropriately initialized; format_time and grid_abstraction are part of a dimension map. kwargs[KeywordArgument.DIMENSION_MAP] = dimension_map kwargs[KeywordArgument.FORMAT_TIME] = format_time if grid_abstraction != 'auto': kwargs[KeywordArgument.GRID_ABSTRACTION] = grid_abstraction if grid_is_isomorphic != 'auto': kwargs['grid_is_isomorphic'] = grid_is_isomorphic field = Field.from_variable_collection(raw_field, *args, **kwargs) # If this is a source grid for regridding, ensure the flag is updated. field.regrid_source = self.rd.regrid_source # Update the assigned coordinate system flag. field._has_assigned_coordinate_system = self.rd._has_assigned_coordinate_system # Apply any requested subsets. if self.rd.time_range is not None: field = field.time.get_between(*self.rd.time_range).parent if self.rd.time_region is not None: field = field.time.get_time_region(self.rd.time_region).parent if self.rd.time_subset_func is not None: field = field.time.get_subset_by_function( self.rd.time_subset_func).parent if self.rd.level_range is not None: field = field.level.get_between(*self.rd.level_range).parent # These variables have all the dimensions needed for a data classification. Use overloaded values from the # request dataset if they are provided. try: data_variable_names = list( get_variable_names(self.rd.rename_variable)) except NoDataVariablesFound: # It is okay to have no data variables in a field. data_variable_names = [] pass for dvn in data_variable_names: field.append_to_tags(TagName.DATA_VARIABLES, dvn, create=True) # Load child fields. for child in list(field.children.values()): kwargs['raw_field'] = child field.children[child.name] = self.create_field(*args, **kwargs) return field