def test_create_unique_global_array(self): dist = OcgDist() dist.create_dimension('dim', 9, dist=True) dist.update_dimension_bounds() values = [ [4, 2, 1, 2, 1, 4, 1, 4, 2], [44, 25, 16, 27, 18, 49, 10, 41, 22], [44, 25, 16, 27, 44, 49, 10, 41, 44], [1, 1, 1, 1, 1, 1, 1, 1, 1] ] for v in values: if vm.rank == 0: index = Variable(name='cindex', value=v, dimensions='dim') desired = np.unique(index.get_value()) desired_length = len(desired) else: index = None index = variable_scatter(index, dist) with vm.scoped_by_emptyable('not empty', index): if not vm.is_null: uvar = create_unique_global_array(index.get_value()) uvar_gathered = vm.gather(uvar) if vm.rank == 0: uvar_gathered = hgather(uvar_gathered) self.assertEqual(len(uvar_gathered), desired_length) self.assertEqual(set(uvar_gathered), set(desired))
def test_create_unique_global_array(self): dist = OcgDist() dist.create_dimension('dim', 9, dist=True) dist.update_dimension_bounds() values = [[4, 2, 1, 2, 1, 4, 1, 4, 2], [44, 25, 16, 27, 18, 49, 10, 41, 22], [44, 25, 16, 27, 44, 49, 10, 41, 44], [1, 1, 1, 1, 1, 1, 1, 1, 1]] for v in values: if vm.rank == 0: index = Variable(name='cindex', value=v, dimensions='dim') desired = np.unique(index.get_value()) desired_length = len(desired) else: index = None index = variable_scatter(index, dist) with vm.scoped_by_emptyable('not empty', index): if not vm.is_null: uvar = create_unique_global_array(index.get_value()) uvar_gathered = vm.gather(uvar) if vm.rank == 0: uvar_gathered = hgather(uvar_gathered) self.assertEqual(len(uvar_gathered), desired_length) self.assertEqual(set(uvar_gathered), set(desired))
def test_init_dimension_map(self): """Test initializing with a dimension map only.""" dmap = DimensionMap() x = Variable(value=[1, 2, 3], dimensions='elements', name='x') y = Variable(value=[4, 5, 6], dimensions='elements', name='y') topo = dmap.get_topology(Topology.POINT, create=True) topo.set_variable(DMK.X, x) topo.set_variable(DMK.Y, y) f = Field(variables=[x, y], dimension_map=dmap) p = PointGC(parent=f) self.assertNumpyAll(x.get_value(), p.x.get_value()) self.assertNumpyAll(y.get_value(), p.y.get_value())
def test_create_index_variable_global(self): raise SkipTest('not implemented') dsrc_size = 5 ddst_size = 7 dsrc = Dimension('dsrc', dsrc_size, dist=True) src_dist = OcgDist() src_dist.add_dimension(dsrc) src_dist.update_dimension_bounds() ddst = Dimension('ddst', ddst_size, dist=True) dst_dist = OcgDist() dst_dist.add_dimension(ddst) dst_dist.update_dimension_bounds() if vm.rank == 0: np.random.seed(1) dst = np.random.rand(ddst_size) src = np.random.choice(dst, size=dsrc_size, replace=False) src = Variable(name='src', value=src, dimensions=dsrc.name) # TODO: move create_ugid_global to create_global_index on a standard variable object dst = GeometryVariable(name='dst', value=dst, dimensions=ddst.name) else: src, dst = [None] * 2 src = variable_scatter(src, src_dist) dst = variable_scatter(dst, dst_dist) actual = create_index_variable_global('index_array', src, dst) self.assertNumpyAll(dst.get_value()[actual.get_value()], src.get_value())
def test_broadcast_variable(self): value = np.random.rand(3, 4, 5) desired_value = deepcopy(value) mask = desired_value > 0.5 desired_mask = deepcopy(mask) original_dimensions = ['time', 'lat', 'lon'] src = Variable(name='src', value=value, mask=mask, dimensions=original_dimensions) dst_names = ['lon', 'lat', 'time'] broadcast_variable(src, dst_names) self.assertEqual(src.shape, (5, 4, 3)) self.assertEqual(src.get_value().shape, (5, 4, 3)) self.assertEqual(desired_value.sum(), src.get_value().sum()) broadcast_variable(src, original_dimensions) self.assertNumpyAll(desired_value, src.get_value()) self.assertNumpyMayShareMemory(value, src.get_value()) self.assertNumpyAll(desired_mask, src.get_mask())
def fixture_driver_scrip_netcdf_field(self): xvalue = np.arange(10., 35., step=5) yvalue = np.arange(45., 85., step=10) grid_size = xvalue.shape[0] * yvalue.shape[0] dim_grid_size = Dimension(name='grid_size', size=grid_size) x = Variable(name='grid_center_lon', dimensions=dim_grid_size) y = Variable(name='grid_center_lat', dimensions=dim_grid_size) for idx, (xv, yv) in enumerate(itertools.product(xvalue, yvalue)): x.get_value()[idx] = xv y.get_value()[idx] = yv gc = PointGC(x=x, y=y, crs=Spherical(), driver=DriverNetcdfSCRIP) grid = GridUnstruct(geoms=[gc]) ret = Field(grid=grid, driver=DriverNetcdfSCRIP) grid_dims = Variable(name='grid_dims', value=[yvalue.shape[0], xvalue.shape[0]], dimensions='grid_rank') ret.add_variable(grid_dims) return ret
def write_subsets(self, src_template, dst_template, wgt_template, index_path): """ Write grid subsets to netCDF files using the provided filename templates. The template must contain the full file path with a single curly-bracer pair to insert the combination counter. ``wgt_template`` should not be a full path. This name is used when generating weight files. >>> template_example = '/path/to/data_{}.nc' :param str src_template: The template for the source subset file. :param str dst_template: The template for the destination subset file. :param str wgt_template: The template for the weight filename. >>> wgt_template = 'esmf_weights_{}.nc' :param index_path: Path to the output indexing netCDF. """ src_filenames = [] dst_filenames = [] wgt_filenames = [] dst_slices = [] # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst))) for ctr, (sub_src, sub_dst, dst_slc) in enumerate(self.iter_src_grid_subsets(yield_dst=True), start=1): # padded = create_zero_padded_integer(ctr, nzeros) src_path = src_template.format(ctr) dst_path = dst_template.format(ctr) wgt_filename = wgt_template.format(ctr) src_filenames.append(os.path.split(src_path)[1]) dst_filenames.append(os.path.split(dst_path)[1]) wgt_filenames.append(wgt_filename) dst_slices.append(dst_slc) for target, path in zip([sub_src, sub_dst], [src_path, dst_path]): if target.is_empty: is_empty = True target = None else: is_empty = False field = Field(grid=target, is_empty=is_empty) ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG) with vm.scoped_by_emptyable('field.write', field): if not vm.is_null: field.write(path) ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG) with vm.scoped('index write', [0]): if not vm.is_null: dim = Dimension('nfiles', len(src_filenames)) vname = ['source_filename', 'destination_filename', 'weights_filename'] values = [src_filenames, dst_filenames, wgt_filenames] grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE attrs = [{'esmf_role': 'grid_splitter_source'}, {'esmf_role': grid_splitter_destination}, {'esmf_role': 'grid_splitter_weights'}] vc = VariableCollection() grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE vidx = Variable(name=grid_splitter_index) vidx.attrs['esmf_role'] = grid_splitter_index vidx.attrs['grid_splitter_source'] = 'source_filename' vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename' vidx.attrs['grid_splitter_weights'] = 'weights_filename' x_bounds = GridSplitterConstants.IndexFile.NAME_X_BOUNDS_VARIABLE vidx.attrs[x_bounds] = x_bounds y_bounds = GridSplitterConstants.IndexFile.NAME_Y_BOUNDS_VARIABLE vidx.attrs[y_bounds] = y_bounds vc.add_variable(vidx) for idx in range(len(vname)): v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx]) vc.add_variable(v) bounds_dimension = Dimension(name='bounds', size=2) xb = Variable(name=x_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'x_split_bounds'}, dtype=int) yb = Variable(name=y_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'y_split_bounds'}, dtype=int) x_name = self.dst_grid.x.dimensions[0].name y_name = self.dst_grid.y.dimensions[0].name for idx, slc in enumerate(dst_slices): xb.get_value()[idx, :] = slc[x_name].start, slc[x_name].stop yb.get_value()[idx, :] = slc[y_name].start, slc[y_name].stop vc.add_variable(xb) vc.add_variable(yb) vc.write(index_path) vm.barrier()
def _write_coll_(self, f, coll, add_geom_uid=True): ocgis_lh(msg='entering _write_coll_ in {}'.format(self.__class__), logger='csv-shp.converter', level=logging.DEBUG) # Load the geometries. The geometry identifier is needed for the data write. for field, container in coll.iter_fields(yield_container=True): field.set_abstraction_geom(create_ugid=True) # Write the output CSV file. ocgis_lh(msg='before CsvShapefileConverter super call in {}'.format( self.__class__), logger='csv-shp.converter', level=logging.DEBUG) super(CsvShapefileConverter, self)._write_coll_(f, coll, add_geom_uid=add_geom_uid) ocgis_lh(msg='after CsvShapefileConverter super call in {}'.format( self.__class__), logger='csv-shp.converter', level=logging.DEBUG) # The output geometry identifier shapefile path. if vm.rank == 0: fiona_path = os.path.join(self._get_or_create_shp_folder_(), self.prefix + '_gid.shp') else: fiona_path = None fiona_path = vm.bcast(fiona_path) if self.ops.aggregate: ocgis_lh( 'creating a UGID-GID shapefile is not necessary for aggregated data. use UGID shapefile.', 'conv.csv-shp', logging.WARN) else: # Write the geometries for each container/field combination. for field, container in coll.iter_fields(yield_container=True): # The container may be empty. Only add the unique geometry identifier if the container has an # associated geometry. if container.geom is not None: ugid_var = Variable(name=container.geom.ugid.name, dimensions=field.geom.dimensions, dtype=constants.DEFAULT_NP_INT) ugid_var.get_value()[:] = container.geom.ugid.get_value( )[0] # Extract the variable components of the geometry file. geom = field.geom.copy() geom = geom.extract() if field.crs is not None: crs = field.crs.copy() crs = crs.extract() else: crs = None # If the dataset geometry identifier is not present, create it. gid = field[HeaderName.ID_GEOMETRY].copy() gid = gid.extract() # Construct the field to write. field_to_write = Field(geom=geom, crs=crs, uid=field.uid) if container.geom is not None: field_to_write.add_variable(ugid_var, is_data=True) field_to_write.add_variable(gid, is_data=True) # Maintain the field/dataset unique identifier if there is one. if field.uid is not None: if gid.repeat_record is None: rr = [] else: rr = list(gid.repeat_record) rr.append((HeaderName.DATASET_IDENTIFER, field.uid)) gid.repeat_record = rr # Write the field. field_to_write.write(fiona_path, write_mode=f[KeywordArgument.WRITE_MODE], driver=DriverKey.VECTOR)