示例#1
0
文件: base.py 项目: NCPP/ocgis
def get_periodicity_parameters(grid):
    """
    Get characteristics of a grid's periodicity. This is only applicable for grids with a spherical coordinate system.
    There are two classifications:
     1. A grid is periodic (i.e. it has global coverage). Periodicity is determined only with the x/longitude dimension.
     2. A grid is non-periodic (i.e. it has regional coverage).

    Call is collective across the current VM.

    :param grid: :class:`~ocgis.Grid`
    :return: A dictionary containing periodicity parameters.
    :rtype: dict
    """
    # Check if grid may be flagged as "periodic" by determining if its extent is global. Use the centroids and the grid
    # resolution to determine this.
    is_periodic = False
    col = grid.x.get_value()
    resolution = grid.resolution_x
    min_col, max_col = col.min(), col.max()
    # Work only with unwrapped coordinates.
    if min_col < 0:
        select = col < 0
        if select.any():
            max_col = np.max(col[col < 0]) + 360.
        select = col >= 0
        if select.any():
            min_col = np.min(col[col >= 0])
    # Check the min and max column values are within a tolerance (the grid resolution) of global (0 to 360) edges.
    if (0. - resolution) <= min_col <= (0. + resolution):
        min_periodic = True
    else:
        min_periodic = False
    if (360. - resolution) <= max_col <= (360. + resolution):
        max_periodic = True
    else:
        max_periodic = False

    # Determin global periodicity.
    min_periodic = vm.gather(min_periodic)
    max_periodic = vm.gather(max_periodic)
    if vm.rank == 0:
        min_periodic = any(min_periodic)
        max_periodic = any(max_periodic)
        if min_periodic and max_periodic:
            is_periodic = True
        else:
            is_periodic = False
    is_periodic = vm.bcast(is_periodic)

    # If the grid is periodic, set the appropriate parameters.
    if is_periodic:
        num_peri_dims = 1
        periodic_dim = 0
        pole_dim = 1
    else:
        num_peri_dims, pole_dim, periodic_dim = [None] * 3

    ret = {'num_peri_dims': num_peri_dims, 'pole_dim': pole_dim, 'periodic_dim': periodic_dim}
    return ret
示例#2
0
    def test_reduce_reindex_coordinate_variables(self):
        self.add_barrier = False
        dist = OcgDist()
        dist.create_dimension('dim', 12, dist=True)
        dist.update_dimension_bounds()

        global_cindex_arr = np.array([4, 2, 1, 2, 1, 4, 1, 4, 2, 5, 6, 7])

        if vm.rank == 0:
            var_cindex = Variable('cindex',
                                  value=global_cindex_arr,
                                  dimensions='dim')
        else:
            var_cindex = None
        var_cindex = variable_scatter(var_cindex, dist)

        vm.create_subcomm_by_emptyable('test', var_cindex, is_current=True)
        if vm.is_null:
            return

        raise_if_empty(var_cindex)

        coords = np.array([
            0, 11, 22, 33, 44, 55, 66, 77, 88, 99, 100, 110, 120, 130, 140, 150
        ])
        coords = Variable(name='coords', value=coords, dimensions='coord_dim')

        new_cindex, u_indices = reduce_reindex_coordinate_variables(var_cindex)

        desired = coords[global_cindex_arr].get_value()

        if len(u_indices) > 0:
            new_coords = coords[u_indices].get_value()
        else:
            new_coords = np.array([])
        gathered_new_coords = vm.gather(new_coords)
        gathered_new_cindex = vm.gather(new_cindex)
        if vm.rank == 0:
            gathered_new_coords = hgather(gathered_new_coords)
            gathered_new_cindex = hgather(gathered_new_cindex)

            actual = gathered_new_coords[gathered_new_cindex]

            self.assertAsSetEqual(gathered_new_cindex.tolist(),
                                  [2, 1, 0, 3, 4, 5])
            desired_new_coords = [11, 22, 44, 55, 66, 77]
            self.assertAsSetEqual(gathered_new_coords.tolist(),
                                  desired_new_coords)
            self.assertEqual(len(gathered_new_coords), len(desired_new_coords))

            self.assertNumpyAll(actual, desired)
示例#3
0
    def test_create_unique_global_array(self):
        dist = OcgDist()
        dist.create_dimension('dim', 9, dist=True)
        dist.update_dimension_bounds()

        values = [
            [4, 2, 1, 2, 1, 4, 1, 4, 2],
            [44, 25, 16, 27, 18, 49, 10, 41, 22],
            [44, 25, 16, 27, 44, 49, 10, 41, 44],
            [1, 1, 1, 1, 1, 1, 1, 1, 1]
        ]

        for v in values:
            if vm.rank == 0:
                index = Variable(name='cindex', value=v, dimensions='dim')
                desired = np.unique(index.get_value())
                desired_length = len(desired)
            else:
                index = None
            index = variable_scatter(index, dist)

            with vm.scoped_by_emptyable('not empty', index):
                if not vm.is_null:
                    uvar = create_unique_global_array(index.get_value())
                    uvar_gathered = vm.gather(uvar)

                    if vm.rank == 0:
                        uvar_gathered = hgather(uvar_gathered)
                        self.assertEqual(len(uvar_gathered), desired_length)
                        self.assertEqual(set(uvar_gathered), set(desired))
示例#4
0
    def _gc_iter_dst_grid_slices_(grid_chunker):
        # TODO: This method uses some global gathers which is not ideal.
        # Destination splitting works off center coordinates only.
        pgc = grid_chunker.dst_grid.abstractions_available['point']

        # Use the unique center values to break the grid into pieces. This ensures that nearby grid cell are close
        # spatially. If we just break the grid into pieces w/out using unique values, the points may be scattered which
        # does not optimize the spatial coverage of the source grid.
        center_lat = pgc.y.get_value()

        # ucenter_lat = np.unique(center_lat)
        ucenter_lat = create_unique_global_array(center_lat)

        ucenter_lat = vm.gather(ucenter_lat)
        if vm.rank == 0:
            ucenter_lat = hgather(ucenter_lat)
            ucenter_lat.sort()
            ucenter_splits = np.array_split(ucenter_lat, grid_chunker.nchunks_dst[0])
        else:
            ucenter_splits = [None] * grid_chunker.nchunks_dst[0]

        for ucenter_split in ucenter_splits:
            ucenter_split = vm.bcast(ucenter_split)
            select = np.zeros_like(center_lat, dtype=bool)
            for v in ucenter_split.flat:
                select = np.logical_or(select, center_lat == v)
            yield select
示例#5
0
    def test_create_unique_global_array(self):
        dist = OcgDist()
        dist.create_dimension('dim', 9, dist=True)
        dist.update_dimension_bounds()

        values = [[4, 2, 1, 2, 1, 4, 1, 4, 2],
                  [44, 25, 16, 27, 18, 49, 10, 41, 22],
                  [44, 25, 16, 27, 44, 49, 10, 41, 44],
                  [1, 1, 1, 1, 1, 1, 1, 1, 1]]

        for v in values:
            if vm.rank == 0:
                index = Variable(name='cindex', value=v, dimensions='dim')
                desired = np.unique(index.get_value())
                desired_length = len(desired)
            else:
                index = None
            index = variable_scatter(index, dist)

            with vm.scoped_by_emptyable('not empty', index):
                if not vm.is_null:
                    uvar = create_unique_global_array(index.get_value())
                    uvar_gathered = vm.gather(uvar)

                    if vm.rank == 0:
                        uvar_gathered = hgather(uvar_gathered)
                        self.assertEqual(len(uvar_gathered), desired_length)
                        self.assertEqual(set(uvar_gathered), set(desired))
示例#6
0
文件: nc.py 项目: wk1984/ocgis
    def _get_field_write_target_(cls, field):
        """Collective!"""

        if field.crs is not None:
            field.crs.format_spatial_object(field)

        grid = field.grid
        if grid is not None:
            # If any grid pieces are masked, ensure the mask is created across all grids.
            has_mask = vm.gather(grid.has_mask)
            if vm.rank == 0:
                if any(has_mask):
                    create_mask = True
                else:
                    create_mask = False
            else:
                create_mask = None
            create_mask = vm.bcast(create_mask)
            if create_mask and not grid.has_mask:
                grid.get_mask(create=True)

            # Putting units on bounds for netCDF-CF can confuse some parsers.
            if grid.has_bounds:
                field = field.copy()
                field.x.bounds.attrs.pop('units', None)
                field.y.bounds.attrs.pop('units', None)

        # Remove the current coordinate system if this is a dummy coordinate system.
        if env.COORDSYS_ACTUAL is not None:
            field = field.copy()
            field.set_crs(env.COORDSYS_ACTUAL, should_add=True)

        return field
示例#7
0
文件: nc.py 项目: NCPP/ocgis
    def _get_field_write_target_(cls, field):
        """Collective!"""
        ocgis_lh(level=10, logger="driver.nc", msg="entering _get_field_write_target_")

        if field.crs is not None:
            field.crs.format_spatial_object(field)

        grid = field.grid
        if grid is not None:
            # If any grid pieces are masked, ensure the mask is created across all grids.
            has_mask = vm.gather(grid.has_mask)
            if vm.rank == 0:
                if any(has_mask):
                    create_mask = True
                else:
                    create_mask = False
            else:
                create_mask = None
            create_mask = vm.bcast(create_mask)
            if create_mask and not grid.has_mask:
                grid.get_mask(create=True)

            # Putting units on bounds for netCDF-CF can confuse some parsers.
            if grid.has_bounds:
                field = field.copy()
                field.x.bounds.attrs.pop('units', None)
                field.y.bounds.attrs.pop('units', None)

        # Remove the current coordinate system if this is a dummy coordinate system.
        if env.COORDSYS_ACTUAL is not None:
            field = field.copy()
            field.set_crs(env.COORDSYS_ACTUAL, should_add=True)

        return field
示例#8
0
    def _get_field_write_target_(cls, field):
        """Collective!"""

        # These changes to the field can be maintained following a write.
        if field.crs is not None:
            field.crs.format_field(field)

        # Putting units on bounds for netCDF-CF can confuse some parsers.
        grid = field.grid
        if grid is not None:
            # If any grid pieces are masked, ensure the mask is created across all grids.
            has_mask = vm.gather(grid.has_mask)
            if vm.rank == 0:
                if any(has_mask):
                    create_mask = True
                else:
                    create_mask = False
            else:
                create_mask = None
            create_mask = vm.bcast(create_mask)
            if create_mask and not grid.has_mask:
                grid.get_mask(create=True)

            if grid.has_bounds:
                field = field.copy()
                field.x.bounds.attrs.pop('units', None)
                field.y.bounds.attrs.pop('units', None)

        return field
示例#9
0
文件: crs.py 项目: NCPP/ocgis
    def get_wrapped_state(self, target):
        """
        :param target: Return the wrapped state of a field. This function only checks grid centroids and geometry
         exteriors. Bounds/corners on the grid are excluded.
        :type target: :class:`~ocgis.Field`
        """
        # TODO: Wrapped state should operate on the x-coordinate variable vectors or geometries only.
        # TODO: This should be a method on grids and geometry variables.
        from ocgis.collection.field import Field
        from ocgis.spatial.base import AbstractXYZSpatialContainer
        from ocgis import vm

        raise_if_empty(self)

        # If this is not a wrappable coordinate system, wrapped state is undefined.
        if not self.is_wrappable:
            ret = None
        else:
            if isinstance(target, Field):
                grid = target.grid
                if grid is not None:
                    target = grid
                else:
                    target = target.geom

            if target is None:
                raise WrappedStateEvalTargetMissing
            elif target.is_empty:
                ret = None
            elif isinstance(target, AbstractXYZSpatialContainer):
                ret = self._get_wrapped_state_from_array_(target.x.get_value())
            else:
                stops = (WrappedState.WRAPPED, WrappedState.UNWRAPPED)
                ret = WrappedState.UNKNOWN
                geoms = target.get_masked_value().flat
                _is_masked = np.ma.is_masked
                _get_ws = self._get_wrapped_state_from_geometry_
                for geom in geoms:
                    if not _is_masked(geom):
                        flag = _get_ws(geom)
                        if flag in stops:
                            ret = flag
                            break

        rets = vm.gather(ret)
        if vm.rank == 0:
            rets = set(rets)
            if WrappedState.WRAPPED in rets:
                ret = WrappedState.WRAPPED
            elif WrappedState.UNWRAPPED in rets:
                ret = WrappedState.UNWRAPPED
            else:
                ret = list(rets)[0]
        else:
            ret = None
        ret = vm.bcast(ret)

        return ret
示例#10
0
    def get_wrapped_state(self, target):
        """
        :param target: Return the wrapped state of a field. This function only checks grid centroids and geometry
         exteriors. Bounds/corners on the grid are excluded.
        :type target: :class:`~ocgis.Field`
        """
        # TODO: Wrapped state should operate on the x-coordinate variable vectors or geometries only.
        # TODO: This should be a method on grids and geometry variables.
        from ocgis.collection.field import Field
        from ocgis.spatial.base import AbstractXYZSpatialContainer
        from ocgis import vm

        raise_if_empty(self)

        # If this is not a wrappable coordinate system, wrapped state is undefined.
        if not self.is_wrappable:
            ret = None
        else:
            if isinstance(target, Field):
                grid = target.grid
                if grid is not None:
                    target = grid
                else:
                    target = target.geom

            if target is None:
                raise WrappedStateEvalTargetMissing
            elif target.is_empty:
                ret = None
            elif isinstance(target, AbstractXYZSpatialContainer):
                ret = self._get_wrapped_state_from_array_(target.x.get_value())
            else:
                stops = (WrappedState.WRAPPED, WrappedState.UNWRAPPED)
                ret = WrappedState.UNKNOWN
                geoms = target.get_masked_value().flat
                _is_masked = np.ma.is_masked
                _get_ws = self._get_wrapped_state_from_geometry_
                for geom in geoms:
                    if not _is_masked(geom):
                        flag = _get_ws(geom)
                        if flag in stops:
                            ret = flag
                            break

        rets = vm.gather(ret)
        if vm.rank == 0:
            rets = set(rets)
            if WrappedState.WRAPPED in rets:
                ret = WrappedState.WRAPPED
            elif WrappedState.UNWRAPPED in rets:
                ret = WrappedState.UNWRAPPED
            else:
                ret = list(rets)[0]
        else:
            ret = None
        ret = vm.bcast(ret)

        return ret
示例#11
0
文件: test_geomc.py 项目: NCPP/ocgis
    def test_reduce_reindex_coordinate_index(self):
        dist = OcgDist()
        dist.create_dimension('dim', 12, dist=True)
        dist.update_dimension_bounds()

        global_cindex_arr = np.array([4, 2, 1, 2, 1, 4, 1, 4, 2, 5, 6, 7])

        if vm.rank == 0:
            var_cindex = Variable('cindex', value=global_cindex_arr, dimensions='dim')
        else:
            var_cindex = None
        var_cindex = variable_scatter(var_cindex, dist)

        vm.create_subcomm_by_emptyable('test', var_cindex, is_current=True)
        if vm.is_null:
            return

        raise_if_empty(var_cindex)

        coords = np.array([0, 11, 22, 33, 44, 55, 66, 77, 88, 99, 100, 110, 120, 130, 140, 150])
        coords = Variable(name='coords', value=coords, dimensions='coord_dim')

        new_cindex, u_indices = reduce_reindex_coordinate_index(var_cindex)

        desired = coords[global_cindex_arr].get_value()

        if len(u_indices) > 0:
            new_coords = coords[u_indices].get_value()
        else:
            new_coords = np.array([])
        gathered_new_coords = vm.gather(new_coords)
        gathered_new_cindex = vm.gather(new_cindex)
        if vm.rank == 0:
            gathered_new_coords = hgather(gathered_new_coords)
            gathered_new_cindex = hgather(gathered_new_cindex)

            actual = gathered_new_coords[gathered_new_cindex]

            self.assertAsSetEqual(gathered_new_cindex.tolist(), [2, 1, 0, 3, 4, 5])
            desired_new_coords = [11, 22, 44, 55, 66, 77]
            self.assertAsSetEqual(gathered_new_coords.tolist(), desired_new_coords)
            self.assertEqual(len(gathered_new_coords), len(desired_new_coords))

            self.assertNumpyAll(actual, desired)
示例#12
0
    def test_arange_from_dimension(self):
        dist = OcgDist()
        dim = dist.create_dimension('dim', size=7, dist=True)
        dist.update_dimension_bounds()

        actual = arange_from_dimension(dim, start=2, dtype=np.int64)
        actual = vm.gather(actual)
        if vm.rank == 0:
            actual = hgather(actual)
            desired = np.arange(2, 9, dtype=np.int64)
            self.assertNumpyAll(actual, desired)
示例#13
0
    def test_arange_from_dimension(self):
        dist = OcgDist()
        dim = dist.create_dimension('dim', size=7, dist=True)
        dist.update_dimension_bounds()

        actual = arange_from_dimension(dim, start=2, dtype=np.int64)
        actual = vm.gather(actual)
        if vm.rank == 0:
            actual = hgather(actual)
            desired = np.arange(2, 9, dtype=np.int64)
            self.assertNumpyAll(actual, desired)
示例#14
0
文件: base.py 项目: nmizukami/ocgis
    def has_mask_global(self):
        """
        Returns ``True`` if the global spatial object has a mask. Collective across the current VM.

        :rtype: bool
        """
        raise_if_empty(self)
        has_masks = vm.gather(self.has_mask)
        if vm.rank == 0:
            has_mask = np.any(has_masks)
        else:
            has_mask = None
        has_mask = vm.bcast(has_mask)
        return has_mask
示例#15
0
文件: base.py 项目: nmizukami/ocgis
    def has_masked_values_global(self):
        """
        Returns ``True`` if the global spatial object's mask contains any masked values. Will return ``False`` if the
        global object has no mask. Collective across the current VM.

        :rtype: bool
        """
        raise_if_empty(self)
        has_masks = vm.gather(self.has_masked_values)
        if vm.rank == 0:
            ret = np.any(has_masks)
        else:
            ret = None
        ret = vm.bcast(ret)
        return ret
示例#16
0
文件: geom.py 项目: Ouranosinc/ocgis
 def geom_type_global(self):
     """
     :returns: global geometry type collective across the current :class:`~ocgis.OcgVM`
     :rtype: str
     :raises: :class:`~ocgis.exc.EmptyObjectError`
     """
     raise_if_empty(self)
     geom_types = vm.gather(self.geom_type)
     if vm.rank == 0:
         for g in geom_types:
             if g.startswith('Multi'):
                 break
     else:
         g = None
     return vm.bcast(g)
示例#17
0
    def test_system_raise_exception_subcommunicator(self):
        if vm.size != 4:
            raise (SkipTest('vm.size != 4'))

        raiser = Mock(side_effect=IndexError('oops'))

        with self.assertRaises(IndexError):
            e = None
            with vm.scoped('the sub which will raise', [2]):
                if not vm.is_null:
                    try:
                        raiser()
                    except IndexError as exc:
                        e = exc
            es = vm.gather(e)
            es = vm.bcast(es)
            for e in es:
                if e is not None:
                    raise e
示例#18
0
文件: base.py 项目: nmizukami/ocgis
    def shape_global(self):
        """
        Get the global shape across the current :class:`~ocgis.OcgVM`.

        :rtype: :class:`tuple` of :class:`int`
        :raises: :class:`~ocgis.exc.EmptyObjectError`
        """

        raise_if_empty(self)

        maxd = [max(d.bounds_global) for d in self.dimensions]
        shapes = vm.gather(maxd)
        if vm.rank == 0:
            shape_global = tuple(np.max(shapes, axis=0))
        else:
            shape_global = None
        shape_global = vm.bcast(shape_global)

        return shape_global
示例#19
0
文件: test_core.py 项目: NCPP/ocgis
    def test_system_raise_exception_subcommunicator(self):
        if vm.size != 4:
            raise (SkipTest('vm.size != 4'))

        raiser = Mock(side_effect=IndexError('oops'))

        with self.assertRaises(IndexError):
            e = None
            with vm.scoped('the sub which will raise', [2]):
                if not vm.is_null:
                    try:
                        raiser()
                    except IndexError as exc:
                        e = exc
            es = vm.gather(e)
            es = vm.bcast(es)
            for e in es:
                if e is not None:
                    raise e
示例#20
0
文件: base.py 项目: nmizukami/ocgis
def get_extent_global(container):
    raise_if_empty(container)

    extent = container.extent
    extents = vm.gather(extent)

    if vm.rank == 0:
        extents = [e for e in extents if e is not None]
        extents = np.array(extents)
        ret = [None] * 4
        ret[0] = np.min(extents[:, 0])
        ret[1] = np.min(extents[:, 1])
        ret[2] = np.max(extents[:, 2])
        ret[3] = np.max(extents[:, 3])
        ret = tuple(ret)
    else:
        ret = None
    ret = vm.bcast(ret)

    return ret
示例#21
0
    def test_gather(self):
        if MPI_SIZE != 8:
            raise SkipTest('MPI_SIZE != 8')

        vm = OcgVM()
        live_ranks = [1, 3, 7]
        # vm.set_live_ranks(live_ranks)
        vm.create_subcomm('tester', live_ranks, is_current=True)

        if MPI_RANK in live_ranks:
            value = MPI_RANK

            gathered_value = vm.gather(value)

        if MPI_RANK == 1:
            self.assertEqual(gathered_value, [1, 3, 7])
        elif MPI_RANK in live_ranks:
            self.assertIsNone(gathered_value)

        vm.finalize()
示例#22
0
文件: test_core.py 项目: NCPP/ocgis
    def test_gather(self):
        if MPI_SIZE != 8:
            raise SkipTest('MPI_SIZE != 8')

        vm = OcgVM()
        live_ranks = [1, 3, 7]
        # vm.set_live_ranks(live_ranks)
        vm.create_subcomm('tester', live_ranks, is_current=True)

        if MPI_RANK in live_ranks:
            value = MPI_RANK

            gathered_value = vm.gather(value)

        if MPI_RANK == 1:
            self.assertEqual(gathered_value, [1, 3, 7])
        elif MPI_RANK in live_ranks:
            self.assertIsNone(gathered_value)

        vm.finalize()
示例#23
0
文件: geom.py 项目: Ouranosinc/ocgis
    def create_ugid_global(self, name, start=1):
        """
        Same as :meth:`~ocgis.GeometryVariable.create_ugid` but collective across the current :class:`~ocgis.OcgVM`.
        
        :raises: :class:`~ocgis.exc.EmptyObjectError`
        """

        raise_if_empty(self)

        sizes = vm.gather(self.size)
        if vm.rank == 0:
            start = start
            for idx, n in enumerate(vm.ranks):
                if n == vm.rank:
                    rank_start = start
                else:
                    vm.comm.send(start, dest=n)
                start += sizes[idx]
        else:
            rank_start = vm.comm.recv(source=0)

        return self.create_ugid(name, start=rank_start)
示例#24
0
文件: base.py 项目: nmizukami/ocgis
def raise_if_empty(target, check_current=False):
    if check_current:
        from ocgis import vm
        gathered = vm.gather(target.is_empty)
        if vm.rank == 0:
            if any(gathered):
                msg = 'No empty {} objects allowed across the current VM.'.format(
                    target.__class__)
                exc = EmptyObjectError(msg)
                try:
                    raise exc
                finally:
                    from ocgis import vm
                    vm.abort(exc=exc)
    elif target.is_empty:
        msg = 'No empty {} objects allowed.'.format(target.__class__)
        exc = EmptyObjectError(msg)
        try:
            raise exc
        finally:
            from ocgis import vm
            vm.abort(exc=exc)
示例#25
0
文件: base.py 项目: huard/ocgis
def get_extent_global(container):
    raise_if_empty(container)

    extent = container.extent
    extents = vm.gather(extent)

    # ocgis_lh(msg='extents={}'.format(extents), logger='spatial.base', level=logging.DEBUG)

    if vm.rank == 0:
        extents = [e for e in extents if e is not None]
        extents = np.array(extents)
        ret = [None] * 4
        ret[0] = np.min(extents[:, 0])
        ret[1] = np.min(extents[:, 1])
        ret[2] = np.max(extents[:, 2])
        ret[3] = np.max(extents[:, 3])
        ret = tuple(ret)
    else:
        ret = None
    ret = vm.bcast(ret)

    return ret
示例#26
0
文件: helpers.py 项目: NCPP/ocgis
def create_unique_global_array(arr):
    """
    Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero
    elements will be returned. This call is collective across the current VM.

    :param arr: Input array for unique operation.
    :type arr: :class:`numpy.ndarray`
    :rtype: :class:`numpy.ndarray`
    :raises: ValueError
    """

    from ocgis import vm

    if arr is None:
        raise ValueError('Input must be a NumPy array.')

    unique_local = np.unique(arr)
    vm.barrier()

    local_bounds = min(unique_local), max(unique_local)
    lb_global = vm.gather(local_bounds)
    lb_global = vm.bcast(lb_global)

    # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds.
    overlaps = []
    for rank, lb in enumerate(lb_global):
        if rank == vm.rank:
            continue
        contains = []
        for lb2 in local_bounds:
            if lb[0] <= lb2 <= lb[1]:
                to_app = True
            else:
                to_app = False
            contains.append(to_app)
        if any(contains) or (local_bounds[0] <= lb[0] and local_bounds[1] >= lb[1]):
            overlaps.append(rank)

    # Send out the overlapping sources.
    tag_overlap = MPITag.OVERLAP_CHECK
    tag_select_send_size = MPITag.SELECT_SEND_SIZE
    vm.barrier()

    # NumPy and MPI types.
    np_type = unique_local.dtype
    mpi_type = vm.get_mpi_type(np_type)

    for o in overlaps:
        if vm.rank != o and vm.rank < o:
            dest_rank_bounds = lb_global[o]
            select_send = np.logical_and(unique_local >= dest_rank_bounds[0], unique_local <= dest_rank_bounds[1])
            u_src = unique_local[select_send]
            select_send_size = u_src.size
            _ = vm.comm.Isend([np.array([select_send_size], dtype=np_type), mpi_type], dest=o, tag=tag_select_send_size)
            _ = vm.comm.Isend([u_src, mpi_type], dest=o, tag=tag_overlap)

    # Receive and process conflicts to reduce the unique local values.
    if vm.rank != 0:
        for o in overlaps:
            if vm.rank != o and vm.rank > o:
                select_send_size = np.array([0], dtype=np_type)
                req_select_send_size = vm.comm.Irecv([select_send_size, mpi_type], source=o, tag=tag_select_send_size)
                req_select_send_size.wait()
                select_send_size = select_send_size[0]

                u_src = np.zeros(select_send_size.astype(int), dtype=np_type)
                req = vm.comm.Irecv([u_src, mpi_type], source=o, tag=tag_overlap)
                req.wait()

                utokeep = np.ones_like(unique_local, dtype=bool)
                for uidx, u in enumerate(unique_local.flat):
                    if u in u_src:
                        utokeep[uidx] = False
                unique_local = unique_local[utokeep]

    vm.barrier()
    return unique_local
示例#27
0
def get_periodicity_parameters(grid):
    """
    Get characteristics of a grid's periodicity. This is only applicable for grids with a spherical coordinate system.
    There are two classifications:
     1. A grid is periodic (i.e. it has global coverage). Periodicity is determined only with the x/longitude dimension.
     2. A grid is non-periodic (i.e. it has regional coverage).

    Call is collective across the current VM.

    :param grid: :class:`~ocgis.Grid`
    :return: A dictionary containing periodicity parameters.
    :rtype: dict
    """
    # Check if grid may be flagged as "periodic" by determining if its extent is global. Use the centroids and the grid
    # resolution to determine this.
    is_periodic = False
    col = grid.x.get_value()
    resolution = grid.resolution_x
    min_col, max_col = col.min(), col.max()
    # Work only with unwrapped coordinates.
    if min_col < 0:
        select = col < 0
        if select.any():
            max_col = np.max(col[col < 0]) + 360.
        select = col >= 0
        if select.any():
            min_col = np.min(col[col >= 0])
    # Check the min and max column values are within a tolerance (the grid resolution) of global (0 to 360) edges.
    if (0. - resolution) <= min_col <= (0. + resolution):
        min_periodic = True
    else:
        min_periodic = False
    if (360. - resolution) <= max_col <= (360. + resolution):
        max_periodic = True
    else:
        max_periodic = False

    # Determin global periodicity.
    min_periodic = vm.gather(min_periodic)
    max_periodic = vm.gather(max_periodic)
    if vm.rank == 0:
        min_periodic = any(min_periodic)
        max_periodic = any(max_periodic)
        if min_periodic and max_periodic:
            is_periodic = True
        else:
            is_periodic = False
    is_periodic = vm.bcast(is_periodic)

    # If the grid is periodic, set the appropriate parameters.
    if is_periodic:
        num_peri_dims = 1
        periodic_dim = 0
        pole_dim = 1
    else:
        num_peri_dims, pole_dim, periodic_dim = [None] * 3

    ret = {
        'num_peri_dims': num_peri_dims,
        'pole_dim': pole_dim,
        'periodic_dim': periodic_dim
    }
    return ret
示例#28
0
def create_unique_global_array(arr):
    """
    Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero
    elements will be returned. This call is collective across the current VM.

    :param arr: Input array for unique operation.
    :type arr: :class:`numpy.ndarray`
    :rtype: :class:`numpy.ndarray`
    :raises: ValueError
    """

    from ocgis import vm

    if arr is None:
        raise ValueError('Input must be a NumPy array.')

    unique_local = np.unique(arr)
    vm.barrier()

    local_bounds = min(unique_local), max(unique_local)
    lb_global = vm.gather(local_bounds)
    lb_global = vm.bcast(lb_global)

    # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds.
    overlaps = []
    for rank, lb in enumerate(lb_global):
        if rank == vm.rank:
            continue
        contains = []
        for lb2 in local_bounds:
            if lb[0] <= lb2 <= lb[1]:
                to_app = True
            else:
                to_app = False
            contains.append(to_app)
        if any(contains) or (local_bounds[0] <= lb[0]
                             and local_bounds[1] >= lb[1]):
            overlaps.append(rank)

    # Send out the overlapping sources.
    tag_overlap = MPITag.OVERLAP_CHECK
    tag_select_send_size = MPITag.SELECT_SEND_SIZE
    vm.barrier()

    # NumPy and MPI types.
    np_type = unique_local.dtype
    mpi_type = vm.get_mpi_type(np_type)

    for o in overlaps:
        if vm.rank != o and vm.rank < o:
            dest_rank_bounds = lb_global[o]
            select_send = np.logical_and(unique_local >= dest_rank_bounds[0],
                                         unique_local <= dest_rank_bounds[1])
            u_src = unique_local[select_send]
            select_send_size = u_src.size
            _ = vm.comm.Isend(
                [np.array([select_send_size], dtype=np_type), mpi_type],
                dest=o,
                tag=tag_select_send_size)
            _ = vm.comm.Isend([u_src, mpi_type], dest=o, tag=tag_overlap)

    # Receive and process conflicts to reduce the unique local values.
    if vm.rank != 0:
        for o in overlaps:
            if vm.rank != o and vm.rank > o:
                select_send_size = np.array([0], dtype=np_type)
                req_select_send_size = vm.comm.Irecv(
                    [select_send_size, mpi_type],
                    source=o,
                    tag=tag_select_send_size)
                req_select_send_size.wait()
                select_send_size = select_send_size[0]

                u_src = np.zeros(select_send_size.astype(int), dtype=np_type)
                req = vm.comm.Irecv([u_src, mpi_type],
                                    source=o,
                                    tag=tag_overlap)
                req.wait()

                utokeep = np.ones_like(unique_local, dtype=bool)
                for uidx, u in enumerate(unique_local.flat):
                    if u in u_src:
                        utokeep[uidx] = False
                unique_local = unique_local[utokeep]

    vm.barrier()
    return unique_local
示例#29
0
    def test_system_converting_state_boundaries_shapefile(self):
        ocgis.env.USE_NETCDF4_MPI = False  # tdk:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug...
        keywords = {'transform_to_crs': [None, Spherical],
                    'use_geometry_iterator': [False, True]}
        actual_xsums = []
        actual_ysums = []
        for k in self.iter_product_keywords(keywords):
            if k.use_geometry_iterator and k.transform_to_crs is not None:
                to_crs = k.transform_to_crs()
            else:
                to_crs = None
            if k.transform_to_crs is None:
                desired_crs = WGS84()
            else:
                desired_crs = k.transform_to_crs()

            rd = RequestDataset(uri=self.path_state_boundaries, variable=['UGID', 'ID'])
            rd.metadata['schema']['geometry'] = 'MultiPolygon'
            field = rd.get()
            self.assertEqual(len(field.data_variables), 2)

            # Test there is no mask present.
            field.geom.load()
            self.assertFalse(field.geom.has_mask)
            self.assertNotIn(VariableName.SPATIAL_MASK, field)
            self.assertIsNone(field.dimension_map.get_spatial_mask())

            self.assertEqual(field.crs, WGS84())
            if k.transform_to_crs is not None:
                field.update_crs(desired_crs)
            self.assertEqual(len(field.data_variables), 2)
            self.assertEqual(len(field.geom.parent.data_variables), 2)
            try:
                gc = field.geom.convert_to(pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs)
            except ValueError as e:
                try:
                    self.assertFalse(k.use_geometry_iterator)
                    self.assertIsNotNone(to_crs)
                except AssertionError:
                    raise e
                else:
                    continue

            actual_xsums.append(gc.x.get_value().sum())
            actual_ysums.append(gc.y.get_value().sum())
            self.assertEqual(gc.crs, desired_crs)

            # Test there is no mask present after conversion to geometry coordinates.
            self.assertFalse(gc.has_mask)
            self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent)
            self.assertIsNone(gc.dimension_map.get_spatial_mask())

            path = self.get_temporary_file_path('esmf_state_boundaries.nc')
            self.assertEqual(gc.parent.crs, desired_crs)
            gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT)

            gathered_geoms = vm.gather(field.geom.get_value())
            if vm.rank == 0:
                actual_geoms = []
                for g in gathered_geoms:
                    actual_geoms.extend(g)

                rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT)
                infield = rd.get()
                self.assertEqual(create_crs(infield.crs.value), desired_crs)
                for dv in field.data_variables:
                    self.assertIn(dv.name, infield)
                ingrid = infield.grid
                self.assertIsInstance(ingrid, GridUnstruct)

                for g in ingrid.archetype.iter_geometries():
                    self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False)

        vm.barrier()

        # Test coordinates have actually changed.
        if not k.use_geometry_iterator:
            for ctr, to_test in enumerate([actual_xsums, actual_ysums]):
                for lhs, rhs in itertools.combinations(to_test, 2):
                    if ctr == 0:
                        self.assertAlmostEqual(lhs, rhs)
                    else:
                        self.assertNotAlmostEqual(lhs, rhs)
        live_ranks = vm.get_live_ranks_from_object(grid_sub)
        bbox = vm.bcast(extent_global, root=live_ranks[0])

        vm.barrier()
        if vm.rank == 0:
            print 'starting bbox subset:', bbox
        vm.barrier()

        has_subset = get_subset(bbox, subset_filename, 1)

        vm.barrier()
        if vm.rank == 0:
            print 'finished bbox subset:', bbox
        vm.barrier()

        has_subset = vm.gather(has_subset)
        if vm.rank == 0:
            if any(has_subset):
                has_subset = True
                ctr += 1
            else:
                has_subset = False
        ctr = vm.bcast(ctr)
        has_subset = vm.bcast(has_subset)

        if has_subset:
            with vm.scoped_by_emptyable('dst subset write', grid_sub):
                if not vm.is_null:
                    grid_sub.parent.write(dst_subset_filename)
示例#31
0
文件: test_geomc.py 项目: NCPP/ocgis
    def test_get_intersects(self):
        subset_geom = self.fixture_subset_geom()
        poly = self.fixture()

        # Scatter the polygon geometry coordinates for the parallel case ===============================================

        dist = OcgDist()
        for d in poly.parent.dimensions.values():
            d = d.copy()
            if d.name == poly.dimensions[0].name:
                d.dist = True
            dist.add_dimension(d)
        dist.update_dimension_bounds()

        poly.parent = variable_collection_scatter(poly.parent, dist)

        vm.create_subcomm_by_emptyable('scatter', poly, is_current=True)
        if vm.is_null:
            return

        poly.parent._validate_()

        for v in poly.parent.values():
            self.assertEqual(id(v.parent), id(poly.parent))
            self.assertEqual(len(v.parent), len(poly.parent))

        # ==============================================================================================================

        # p = os.path.join('/tmp/subset_geom.shp')
        # s = GeometryVariable.from_shapely(subset_geom)
        # s.write_vector(p)
        # p = os.path.join('/tmp/poly.shp')
        # s = poly.convert_to()
        # s.write_vector(p)

        sub = poly.get_intersects(subset_geom)
        vm.create_subcomm_by_emptyable('after intersects', sub, is_current=True)
        if vm.is_null:
            return

        actual = []
        for g in sub.iter_geometries():
            if g[1] is not None:
                actual.append([g[1].centroid.x, g[1].centroid.y])
        desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]]
        actual = vm.gather(actual)
        if vm.rank == 0:
            gactual = []
            for a in actual:
                for ia in a:
                    gactual.append(ia)
            self.assertEqual(gactual, desired)

        self.assertEqual(len(sub.parent), len(poly.parent))

        sub.parent._validate_()
        sub2 = sub.reduce_global()
        sub2.parent._validate_()

        # p = os.path.join('/tmp/sub.shp')
        # s = sub.convert_to()
        # s.write_vector(p)
        # p = os.path.join('/tmp/sub2.shp')
        # s = sub2.convert_to()
        # s.write_vector(p)

        # Gather then broadcast coordinates so all coordinates are available on each process.
        to_add = []
        for gather_target in [sub2.x, sub2.y]:
            gathered = variable_gather(gather_target.extract())
            gathered = vm.bcast(gathered)
            to_add.append(gathered)
        for t in to_add:
            sub2.parent.add_variable(t, force=True)

        for ctr, to_check in enumerate([sub, sub2]):
            actual = []
            for g in to_check.iter_geometries():
                if g[1] is not None:
                    actual.append([g[1].centroid.x, g[1].centroid.y])
            desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]]
            actual = vm.gather(actual)
            if vm.rank == 0:
                gactual = []
                for a in actual:
                    for ia in a:
                        gactual.append(ia)
                self.assertEqual(gactual, desired)
示例#32
0
    def get_distributed_slice(self, slc):
        """
        Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be
        empty.
        
        :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be
         processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy
         ``slc`` is not manipulated or redistributed prior to slicing.
        :rtype: :class:`~ocgis.Dimension`
        :raises: :class:`~ocgis.exc.EmptyObjectError`
        """

        raise_if_empty(self)

        slc = get_formatted_slice(slc, 1)[0]
        is_fancy = not isinstance(slc, slice)

        if not is_fancy and slc == slice(None):
            ret = self.copy()
        # Use standard slicing for non-distributed dimensions.
        elif not self.dist:
            ret = self[slc]
        else:
            if is_fancy:
                local_slc = slc
            else:
                local_slc = get_global_to_local_slice((slc.start, slc.stop),
                                                      self.bounds_local)
                if local_slc is not None:
                    local_slc = slice(*local_slc)
            # Slice does not overlap local bounds. The dimension is now empty with size 0.
            if local_slc is None:
                ret = self.copy()
                ret.convert_to_empty()
                dimension_size = 0
            # Slice overlaps so do a slice on the dimension using the local slice.
            else:
                ret = self[local_slc]
                dimension_size = len(ret)
            assert dimension_size >= 0
            dimension_sizes = vm.gather(dimension_size)
            if vm.rank == 0:
                sum_dimension_size = 0
                for ds in dimension_sizes:
                    try:
                        sum_dimension_size += ds
                    except TypeError:
                        pass
                bounds_global = (0, sum_dimension_size)
            else:
                bounds_global = None
            bounds_global = vm.bcast(bounds_global)
            if not ret.is_empty:
                ret.bounds_global = bounds_global

            # Normalize the local bounds on live ranks.
            inner_live_ranks = get_nonempty_ranks(ret, vm)
            with vm.scoped('bounds normalization', inner_live_ranks):
                if not vm.is_null:
                    if vm.rank == 0:
                        adjust = len(ret)
                    else:
                        adjust = None
                    adjust = vm.bcast(adjust)
                    for current_rank in vm.ranks:
                        if vm.rank == current_rank:
                            if vm.rank != 0:
                                ret.bounds_local = [
                                    b + adjust for b in ret.bounds_local
                                ]
                                adjust += len(ret)
                        vm.barrier()
                        adjust = vm.bcast(adjust, root=current_rank)
        return ret
示例#33
0
文件: mpi.py 项目: NCPP/ocgis
def redistribute_by_src_idx(variable, dimname, dimension):
    """
    Redistribute values in ``variable`` using the source index associated with ``dimension``. The reloads the data from
    source and does not do an in-memory redistribution using MPI.

    This function is collective across the current `~ocgis.OcgVM`.

    * Uses fancy indexing only.
    * Gathers all source indices to a single processor.

    :param variable: The variable to redistribute.
    :type variable: :class:`~ocgis.Variable`
    :param str dimname: The name of the dimension holding the source indices.
    :param dimension: The dimension object.
    :type dimension: :class:`~ocgis.Dimension`
    """
    from ocgis import SourcedVariable, Variable, vm
    from ocgis.variable.dimension import create_src_idx

    assert isinstance(variable, SourcedVariable)
    assert dimname is not None

    # If this is a serial operation just return. The rank should be fully autonomous in terms of its source information.
    if vm.size == 1:
        return

    # There needs to be at least one rank to redistribute.
    live_ranks = vm.get_live_ranks_from_object(variable)
    if len(live_ranks) == 0:
        raise ValueError('There must be at least one rank to redistribute by source index.')

    # Remove relevant values from a variable.
    def _reset_variable_(target):
        target._is_empty = None
        target._mask = None
        target._value = None
        target._has_initialized_value = False

    # Gather the sliced dimensions. This dimension hold the source indices that are redistributed.
    dims_global = vm.gather(dimension)

    if vm.rank == 0:
        # Filter any none-type dimensions to handle currently empty ranks.
        dims_global = [d for d in dims_global if d is not None]
        # Convert any bounds-type source indices to fancy type.
        # TODO: Support bounds-type source indices.
        for d in dims_global:
            if d._src_idx_type == SourceIndexType.BOUNDS:
                d._src_idx = create_src_idx(*d._src_idx, si_type=SourceIndexType.FANCY)
        # Create variable to scatter that holds the new global source indices.
        global_src_idx = hgather([d._src_idx for d in dims_global])
        global_src_idx = Variable(name='global_src_idx', value=global_src_idx, dimensions=dimname)
        # The new size is also needed to create a regular distribution for the variable scatter.
        global_src_idx_size = global_src_idx.size
    else:
        global_src_idx, global_src_idx_size = [None] * 2

    # Build the new distribution based on the gathered source indices.
    global_src_idx_size = vm.bcast(global_src_idx_size)
    dest_dist = OcgDist()
    new_dim = dest_dist.create_dimension(dimname, global_src_idx_size, dist=True)
    dest_dist.update_dimension_bounds()

    # This variable holds the new source indices.
    new_rank_src_idx = variable_scatter(global_src_idx, dest_dist)

    if new_rank_src_idx.is_empty:
        # Support new empty ranks following the scatter.
        variable.convert_to_empty()
    else:
        # Reset the variable so everything can be loaded from source.
        _reset_variable_(variable)
        # Update the source index on the target dimension.
        new_dim._src_idx = new_rank_src_idx.get_value()
        # Add the dimension with the new source index to the collection.
        variable.parent.dimensions[dimname] = new_dim

    # All emptiness should be pushed back to the dimensions.
    variable.parent._is_empty = None
    for var in variable.parent.values():
        var._is_empty = None

    # Any variables that have a shared dimension should also be reset.
    for var in variable.parent.values():
        if dimname in var.dimension_names:
            if new_rank_src_idx.is_empty:
                var.convert_to_empty()
            else:
                _reset_variable_(var)
示例#34
0
文件: geomc.py 项目: wk1984/ocgis
def reduce_reindex_coordinate_index(cindex, start_index=0):
    """
    Reindex a subset of global coordinate indices contained in the ``cindex`` variable.

    The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure.

    Function will not respect masks.

    The function returns a two-element tuple:

     * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing.
     * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external
       coordinate storage variable or array.

    :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may
     also be a NumPy array.
    :type cindex: :class:`~ocgis.Variable` | :class:`~numpy.ndarray`
    :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``.
    :rtype: tuple
    """

    # Get the coordinate index values as a NumPy array.
    try:
        cindex = cindex.get_value()
    except AttributeError:
        # Assume this is already a NumPy array.
        pass

    # Only work with 1D arrays.
    cindex = np.atleast_1d(cindex)
    # Used to return the coordinate index to the original shape of the incoming coordinate index.
    original_shape = cindex.shape
    cindex = cindex.flatten()

    # Create the unique coordinate index array.
    # barrier_print('before create_unique_global_array')
    u = np.array(create_unique_global_array(cindex))
    # barrier_print('after create_unique_global_array')

    # Synchronize the data type for the new coordinate index.
    lrank = vm.rank
    if lrank == 0:
        dtype = u.dtype
    else:
        dtype = None
    dtype = vm.bcast(dtype)

    # Flag to indicate if the current rank has any unique values.
    has_u = len(u) > 0

    # Create the new coordinate index.
    new_u_dimension = create_distributed_dimension(len(u), name='__new_u_dimension__')
    new_u = arange_from_dimension(new_u_dimension, start=start_index, dtype=dtype)

    # Create a hash for the new index. This is used to remap the old coordinate index.
    if has_u:
        uidx = {ii: jj for ii, jj in zip(u, new_u)}
    else:
        uidx = None

    vm.barrier()

    # Construct local bounds for the rank's unique value. This is used as a cheap index when ranks are looking for
    # index overlaps.
    if has_u:
        local_bounds = min(u), max(u)
    else:
        local_bounds = None
    # Put a copy for the bounds indexing on each rank.
    lb_global = vm.gather(local_bounds)
    lb_global = vm.bcast(lb_global)

    # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds.
    overlaps = []
    for rank, lb in enumerate(lb_global):
        if rank == lrank:
            continue
        if lb is not None:
            contains = lb[0] <= cindex
            contains = np.logical_and(lb[1] >= cindex, contains)
            if np.any(contains):
                overlaps.append(rank)

    # Ranks must be able to identify which ranks will be asking them for data.
    global_overlaps = vm.gather(overlaps)
    global_overlaps = vm.bcast(global_overlaps)
    destinations = [ii for ii, jj in enumerate(global_overlaps) if vm.rank in jj]

    # MPI communication tags used in the algorithm.
    tag_search = MPITag.REDUCE_REINDEX_SEARCH
    tag_success = MPITag.REDUCE_REINDEX_SUCCESS
    tag_child_finished = MPITag.REDUCE_REINDEX_CHILD_FINISHED
    tag_found = MPITag.REDUCE_REINDEX_FOUND

    # Fill array for the new coordinate index.
    new_cindex = np.empty_like(cindex)

    # vm.barrier_print('starting run_rr')
    # Fill the new coordinate indexing.
    if lrank == 0:
        run_rr_root(new_cindex, cindex, uidx, destinations, tag_child_finished, tag_found, tag_search, tag_success)
    else:
        run_rr_nonroot(new_cindex, cindex, uidx, destinations, has_u, overlaps, tag_child_finished, tag_found,
                       tag_search,
                       tag_success)
    # vm.barrier_print('finished run_rr')

    # Return array to its original shape.
    new_cindex = new_cindex.reshape(*original_shape)

    vm.barrier()

    return new_cindex, u
示例#35
0
文件: mpi.py 项目: NCPP/ocgis
def variable_gather(variable, root=0):
    from ocgis import vm

    if variable.is_empty:
        raise ValueError('No empty variables allowed.')

    if vm.size > 1:
        if vm.rank == root:
            new_variable = variable.copy()
            new_variable.dtype = variable.dtype
            new_variable._mask = None
            new_variable._value = None
            new_variable._dimensions = None
            assert not new_variable.has_allocated_value
        else:
            new_variable = None
    else:
        new_variable = variable
        for dim in new_variable.dimensions:
            dim.dist = False

    if vm.size > 1:
        if vm.rank == root:
            new_dimensions = [None] * variable.ndim
        else:
            new_dimensions = None

        for idx, dim in enumerate(variable.dimensions):
            if dim.dist:
                parts = vm.gather(dim)
            if vm.rank == root:
                new_dim = dim.copy()
                if dim.dist:
                    new_size = 0
                    has_src_idx = False
                    for part in parts:
                        has_src_idx = part._src_idx is not None
                        new_size += len(part)
                    if has_src_idx:
                        if part._src_idx_type == SourceIndexType.FANCY:
                            new_src_idx = np.zeros(new_size, dtype=DataType.DIMENSION_SRC_INDEX)
                            for part in parts:
                                new_src_idx[part.bounds_local[0]: part.bounds_local[1]] = part._src_idx
                        else:
                            part_bounds = [None] * len(parts)
                            for idx2, part in enumerate(parts):
                                part_bounds[idx2] = part._src_idx
                            part_bounds = np.array(part_bounds)
                            new_src_idx = (part_bounds.min(), part_bounds.max())
                    else:
                        new_src_idx = None
                    new_dim = dim.copy()
                    new_dim.set_size(new_size, src_idx=new_src_idx)
                    new_dim.dist = False
                new_dimensions[idx] = new_dim

        if vm.rank == root:
            new_variable.set_dimensions(new_dimensions, force=True)

        gathered_variables = vm.gather(variable)

        if vm.rank == root:
            for idx, gv in enumerate(gathered_variables):
                destination_slice = [slice(*dim.bounds_local) for dim in gv.dimensions]
                new_variable.__setitem__(destination_slice, gv)
            return new_variable
        else:
            return
    else:
        return new_variable
示例#36
0
    def test_get_intersects(self):
        self.add_barrier = False
        subset_geom = self.fixture_subset_geom()
        poly = self.fixture()

        dist = OcgDist()
        for d in poly.parent.dimensions.values():
            d = d.copy()
            if d.name == poly.dimensions[0].name:
                d.dist = True
            dist.add_dimension(d)
        dist.update_dimension_bounds()

        poly.parent = variable_collection_scatter(poly.parent, dist)

        vm.create_subcomm_by_emptyable('scatter', poly, is_current=True)
        if vm.is_null:
            return

        poly.parent._validate_()

        for v in poly.parent.values():
            self.assertEqual(id(v.parent), id(poly.parent))
            self.assertEqual(len(v.parent), len(poly.parent))

        sub = poly.get_intersects(subset_geom)
        vm.create_subcomm_by_emptyable('after intersects',
                                       sub,
                                       is_current=True)
        if vm.is_null:
            return

        actual = []
        for g in sub.iter_geometries():
            if g[1] is not None:
                actual.append([g[1].centroid.x, g[1].centroid.y])
        desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]]
        actual = vm.gather(actual)
        if vm.rank == 0:
            gactual = []
            for a in actual:
                for ia in a:
                    gactual.append(ia)
            self.assertEqual(gactual, desired)

        self.assertEqual(len(sub.parent), len(poly.parent))

        sub.parent._validate_()
        sub2 = sub.reduce_global()
        sub2.parent._validate_()

        # Gather then broadcast coordinates so all coordinates are available on each process.
        to_add = []
        for gather_target in [sub2.x, sub2.y]:
            gathered = variable_gather(gather_target.extract())
            gathered = vm.bcast(gathered)
            to_add.append(gathered)
        for t in to_add:
            sub2.parent.add_variable(t, force=True)

        for ctr, to_check in enumerate([sub, sub2]):
            actual = []
            for g in to_check.iter_geometries():
                if g[1] is not None:
                    actual.append([g[1].centroid.x, g[1].centroid.y])
            desired = [[20.0, -49.5], [10.0, -44.5], [10.0, -39.5]]
            actual = vm.gather(actual)
            if vm.rank == 0:
                gactual = []
                for a in actual:
                    for ia in a:
                        gactual.append(ia)
                self.assertEqual(gactual, desired)
示例#37
0
文件: dimension.py 项目: NCPP/ocgis
    def get_distributed_slice(self, slc):
        """
        Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be
        empty.
        
        :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be
         processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy
         ``slc`` is not manipulated or redistributed prior to slicing.
        :rtype: :class:`~ocgis.Dimension`
        :raises: :class:`~ocgis.exc.EmptyObjectError`
        """

        raise_if_empty(self)

        slc = get_formatted_slice(slc, 1)[0]
        is_fancy = not isinstance(slc, slice)

        if not is_fancy and slc == slice(None):
            ret = self.copy()
        # Use standard slicing for non-distributed dimensions.
        elif not self.dist:
            ret = self[slc]
        else:
            if is_fancy:
                local_slc = slc
            else:
                local_slc = get_global_to_local_slice((slc.start, slc.stop), self.bounds_local)
                if local_slc is not None:
                    local_slc = slice(*local_slc)
            # Slice does not overlap local bounds. The dimension is now empty with size 0.
            if local_slc is None:
                ret = self.copy()
                ret.convert_to_empty()
                dimension_size = 0
            # Slice overlaps so do a slice on the dimension using the local slice.
            else:
                ret = self[local_slc]
                dimension_size = len(ret)
            assert dimension_size >= 0
            dimension_sizes = vm.gather(dimension_size)
            if vm.rank == 0:
                sum_dimension_size = 0
                for ds in dimension_sizes:
                    try:
                        sum_dimension_size += ds
                    except TypeError:
                        pass
                bounds_global = (0, sum_dimension_size)
            else:
                bounds_global = None
            bounds_global = vm.bcast(bounds_global)
            if not ret.is_empty:
                ret.bounds_global = bounds_global

            # Normalize the local bounds on live ranks.
            inner_live_ranks = get_nonempty_ranks(ret, vm)
            with vm.scoped('bounds normalization', inner_live_ranks):
                if not vm.is_null:
                    if vm.rank == 0:
                        adjust = len(ret)
                    else:
                        adjust = None
                    adjust = vm.bcast(adjust)
                    for current_rank in vm.ranks:
                        if vm.rank == current_rank:
                            if vm.rank != 0:
                                ret.bounds_local = [b + adjust for b in ret.bounds_local]
                                adjust += len(ret)
                        vm.barrier()
                        adjust = vm.bcast(adjust, root=current_rank)
        return ret
示例#38
0
def reduce_reindex_coordinate_variables(cindex, start_index=0):
    """
    Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The coordinate values contained
    in ``coords`` will be reduced to match the coordinates required by the indices in ``cindex``.

    The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure.

    Function will not respect masks.

    The function returns a two-element tuple:

     * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing.
     * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external
       coordinate storage variable or array.

    :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may
     also be a NumPy array.
    :type cindex: :class:`~ocgis.Variable` || :class:`~numpy.ndarray`
    :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``.
    :rtype: tuple
    """

    # Get the coordinate index values as a NumPy array.
    try:
        cindex = cindex.get_value()
    except AttributeError:
        # Assume this is already a NumPy array.
        pass

    # Create the unique coordinte index array.
    u = np.array(create_unique_global_array(cindex))

    # Holds re-indexed values.
    new_cindex = np.empty_like(cindex)
    # Caches the local re-indexing for the process.
    cache = {}
    # Increment the indexing values based on its presence in the cache.
    curr_idx = 0
    for idx, to_reindex in enumerate(u.flat):
        if to_reindex not in cache:
            cache[to_reindex] = curr_idx
            curr_idx += 1

    # MPI communication tags.
    tag_cache_create = MPITag.REINDEX_CACHE_CREATE
    tag_cache_get_recv = MPITag.REINDEX_CACHE_GET_RECV
    tag_cache_get_send = MPITag.REINDEX_CACHE_GET_SEND

    # This is the local offset to move sequentially across processes. If the local cache is empty, there is no
    # offsetting to move between tasks.
    if len(cache) > 0:
        offset = max(cache.values()) + 1
    else:
        offset = 0

    # Synchronize the processes with the appropriate local offset.
    for idx, rank in enumerate(vm.ranks):
        try:
            dest_rank = vm.ranks[idx + 1]
        except IndexError:
            break
        else:
            if vm.rank == rank:
                vm.comm.send(start_index + offset,
                             dest=dest_rank,
                             tag=tag_cache_create)
            elif vm.rank == dest_rank:
                offset_previous = vm.comm.recv(source=rank,
                                               tag=tag_cache_create)
                start_index = offset_previous
    vm.barrier()

    # Find any missing local coordinate indices that are not mapped by the local cache.
    is_missing = False
    is_missing_indices = []
    for idx, to_reindex in enumerate(cindex.flat):
        try:
            local_new_cindex = cache[to_reindex]
        except KeyError:
            is_missing = True
            is_missing_indices.append(idx)
        else:
            new_cindex[idx] = local_new_cindex + start_index

    # Check if there are any processors missing their new index values.
    is_missing_global = vm.gather(is_missing)
    if vm.rank == 0:
        is_missing_global = any(is_missing_global)
    is_missing_global = vm.bcast(is_missing_global)

    # Execute a search across the process caches for any missing coordinate index values.
    if is_missing_global:
        for rank in vm.ranks:
            is_missing_rank = vm.bcast(is_missing, root=rank)
            if is_missing_rank:
                n_missing = vm.bcast(len(is_missing_indices), root=rank)
                if vm.rank == rank:
                    for imi in is_missing_indices:
                        for subrank in vm.ranks:
                            if vm.rank != subrank:
                                vm.comm.send(cindex[imi],
                                             dest=subrank,
                                             tag=tag_cache_get_recv)
                                new_cindex_element = vm.comm.recv(
                                    source=subrank, tag=tag_cache_get_send)
                                if new_cindex_element is not None:
                                    new_cindex[imi] = new_cindex_element
                else:
                    for _ in range(n_missing):
                        curr_missing = vm.comm.recv(source=rank,
                                                    tag=tag_cache_get_recv)
                        new_cindex_element = cache.get(curr_missing)
                        if new_cindex_element is not None:
                            new_cindex_element += start_index
                        vm.comm.send(new_cindex_element,
                                     dest=rank,
                                     tag=tag_cache_get_send)

    return new_cindex, u
示例#39
0
文件: geom.py 项目: Ouranosinc/ocgis
    def get_unioned(self,
                    dimensions=None,
                    union_dimension=None,
                    spatial_average=None,
                    root=0):
        """
        Unions _unmasked_ geometry objects. Collective across the current :class:`~ocgis.OcgVM`.
        """
        # TODO: optimize!

        # Get dimension names and lengths for the dimensions to union.
        if dimensions is None:
            dimensions = self.dimensions
        dimension_names = get_dimension_names(dimensions)
        dimension_lengths = [
            len(self.parent.dimensions[dn]) for dn in dimension_names
        ]

        # Get the variables to spatial average.
        if spatial_average is not None:
            variable_names_to_weight = get_variable_names(spatial_average)
        else:
            variable_names_to_weight = []

        # Get the new dimensions for the geometry variable. The union dimension is always the last dimension.
        if union_dimension is None:
            from ocgis.variable.dimension import Dimension
            union_dimension = Dimension(
                constants.DimensionName.UNIONED_GEOMETRY, 1)
        new_dimensions = []
        for dim in self.dimensions:
            if dim.name not in dimension_names:
                new_dimensions.append(dim)
        new_dimensions.append(union_dimension)

        # Configure the return variable.
        ret = self.copy()
        if spatial_average is None:
            ret = ret.extract()
        ret.set_mask(None)
        ret.set_value(None)
        ret.set_dimensions(new_dimensions)
        ret.allocate_value()

        # Destination indices in the return variable are filled with non-masked, unioned geometries.
        for dst_indices in product(
                *
            [list(range(dl)) for dl in get_dimension_lengths(new_dimensions)]):
            dst_slc = {
                new_dimensions[ii].name: dst_indices[ii]
                for ii in range(len(new_dimensions))
            }

            # Select the geometries to union skipping any masked geometries.
            to_union = deque()
            for indices in product(
                    *[list(range(dl)) for dl in dimension_lengths]):
                dslc = {
                    dimension_names[ii]: indices[ii]
                    for ii in range(len(dimension_names))
                }
                sub = self[dslc]
                sub_mask = sub.get_mask()
                if sub_mask is None:
                    to_union.append(sub.get_value().flatten()[0])
                else:
                    if not sub_mask.flatten()[0]:
                        to_union.append(sub.get_value().flatten()[0])

            # Execute the union operation.
            processed_to_union = deque()
            for geom in to_union:
                if isinstance(geom, MultiPolygon) or isinstance(
                        geom, MultiPoint):
                    for element in geom:
                        processed_to_union.append(element)
                else:
                    processed_to_union.append(geom)
            unioned = cascaded_union(processed_to_union)

            # Pull unioned geometries and union again for the final unioned geometry.
            if vm.size > 1:
                unioned_gathered = vm.gather(unioned)
                if vm.rank == root:
                    unioned = cascaded_union(unioned_gathered)

            # Fill the return geometry variable value with the unioned geometry.
            to_fill = ret[dst_slc].get_value()
            to_fill[0] = unioned

        # Spatial average shared dimensions.
        if spatial_average is not None:
            # Get source data to weight.
            for var_to_weight in filter(
                    lambda ii: ii.name in variable_names_to_weight,
                    list(self.parent.values())):
                # Holds sizes of dimensions to iterate. These dimension are not squeezed by the weighted averaging.
                range_to_itr = []
                # Holds the names of dimensions to squeeze.
                names_to_itr = []
                # Dimension names that are squeezed. Also the dimensions for the weight matrix.
                names_to_slice_all = []
                for dn in var_to_weight.dimensions:
                    if dn.name in self.dimension_names:
                        names_to_slice_all.append(dn.name)
                    else:
                        range_to_itr.append(len(dn))
                        names_to_itr.append(dn.name)

                # Reference the weights on the source geometry variable.
                weights = self[{
                    nsa: slice(None)
                    for nsa in names_to_slice_all
                }].weights

                # Path if there are iteration dimensions. Checks for axes ordering in addition.
                if len(range_to_itr) > 0:
                    # New dimensions for the spatially averaged variable. Unioned dimension is always last. Remove the
                    # dimensions aggregated by the weighted average.
                    new_dimensions = [
                        dim for dim in var_to_weight.dimensions
                        if dim.name not in dimension_names
                    ]
                    new_dimensions.append(union_dimension)

                    # Prepare the spatially averaged variable.
                    target = ret.parent[var_to_weight.name]
                    target.set_mask(None)
                    target.set_value(None)
                    target.set_dimensions(new_dimensions)
                    target.allocate_value()

                    # Swap weight axes to make sure they align with the target variable.
                    swap_chain = get_swap_chain(dimension_names,
                                                names_to_slice_all)
                    if len(swap_chain) > 0:
                        weights = weights.copy()
                    for sc in swap_chain:
                        weights = weights.swapaxes(*sc)

                    # The main weighting loop. Can get quite intensive with many, large iteration dimensions.
                    len_names_to_itr = len(names_to_itr)
                    slice_none = slice(None)
                    squeeze_out = [
                        ii for ii, dim in enumerate(var_to_weight.dimensions)
                        if dim.name in names_to_itr
                    ]
                    should_squeeze = True if len(squeeze_out) > 0 else False
                    np_squeeze = np.squeeze
                    np_atleast_1d = np.atleast_1d
                    np_ma_average = np.ma.average
                    for nonweighted_indices in product(
                            *[list(range(ri)) for ri in range_to_itr]):
                        w_slc = {
                            names_to_itr[ii]: nonweighted_indices[ii]
                            for ii in range(len_names_to_itr)
                        }
                        for nsa in names_to_slice_all:
                            w_slc[nsa] = slice_none
                        data_to_weight = var_to_weight[w_slc].get_masked_value(
                        )
                        if should_squeeze:
                            data_to_weight = np_squeeze(
                                data_to_weight, axis=tuple(squeeze_out))
                        weighted_value = np_atleast_1d(
                            np_ma_average(data_to_weight, weights=weights))
                        target[w_slc].get_value()[:] = weighted_value
                else:
                    target_to_weight = var_to_weight.get_masked_value()
                    # Sort to minimize floating point sum errors.
                    target_to_weight = target_to_weight.flatten()
                    weights = weights.flatten()
                    sindices = np.argsort(target_to_weight)
                    target_to_weight = target_to_weight[sindices]
                    weights = weights[sindices]

                    weighted_value = np.atleast_1d(
                        np.ma.average(target_to_weight, weights=weights))
                    target = ret.parent[var_to_weight.name]
                    target.set_mask(None)
                    target.set_value(None)
                    target.set_dimensions(new_dimensions)
                    target.set_value(weighted_value)

            # Collect areas of live ranks and convert to weights.
            if vm.size > 1:
                # If there is no area information (points for example, we need to use counts).
                if ret.area.data[0].max() == 0:
                    weight_or_proxy = float(self.size)
                else:
                    weight_or_proxy = ret.area.data[0]

                if vm.rank != root:
                    vm.comm.send(weight_or_proxy, dest=root)
                else:
                    live_rank_areas = [weight_or_proxy]
                    for tner in vm.ranks:
                        if tner != vm.rank:
                            recv_area = vm.comm.recv(source=tner)
                            live_rank_areas.append(recv_area)
                    live_rank_areas = np.array(live_rank_areas)

                    rank_weights = live_rank_areas / np.max(live_rank_areas)

                for var_to_weight in filter(
                        lambda ii: ii.name in variable_names_to_weight,
                        list(ret.parent.values())):
                    dimensions_to_itr = [
                        dim.name for dim in var_to_weight.dimensions
                        if dim.name != union_dimension.name
                    ]
                    slc = {union_dimension.name: 0}
                    for idx_slc in var_to_weight.iter_dict_slices(
                            dimensions=dimensions_to_itr):
                        idx_slc.update(slc)
                        to_weight = var_to_weight[idx_slc].get_value().flatten(
                        )[0]
                        if vm.rank == root:
                            collected_to_weight = [to_weight]
                        if not vm.rank == root:
                            vm.comm.send(to_weight, dest=root)
                        else:
                            for tner in vm.ranks:
                                if not tner == root:
                                    recv_to_weight = vm.comm.recv(source=tner)
                                    collected_to_weight.append(recv_to_weight)

                            # Sort to minimize floating point sum errors.
                            collected_to_weight = np.array(collected_to_weight)
                            sindices = np.argsort(collected_to_weight)
                            collected_to_weight = collected_to_weight[sindices]
                            rank_weights = rank_weights[sindices]

                            weighted = np.atleast_1d(
                                np.ma.average(collected_to_weight,
                                              weights=rank_weights))
                            var_to_weight[idx_slc].get_value()[:] = weighted
        if vm.rank == root:
            return ret
        else:
            return
示例#40
0
    def test_system_spatial_averaging_through_operations_state_boundaries(self):
        if MPI_SIZE != 8:
            raise SkipTest('MPI_SIZE != 8')

        ntime = 3
        # Get the exact field value for the state's representative center.
        with vm.scoped([0]):
            if MPI_RANK == 0:
                states = RequestDataset(self.path_state_boundaries, driver='vector').get()
                states.update_crs(env.DEFAULT_COORDSYS)
                fill = np.zeros((states.geom.shape[0], 2))
                for idx, geom in enumerate(states.geom.get_value().flat):
                    centroid = geom.centroid
                    fill[idx, :] = centroid.x, centroid.y
                exact_states = create_exact_field_value(fill[:, 0], fill[:, 1])
                state_ugid = states['UGID'].get_value()
                area = states.geom.area

        keywords = {
            'spatial_operation': [
                'clip',
                'intersects'
            ],
            'aggregate': [
                True,
                False
            ],
            'wrapped': [True, False],
            'output_format': [
                OutputFormatName.OCGIS,
                'csv',
                'csv-shp',
                'shp'
            ],
        }

        # total_iterations = len(list(self.iter_product_keywords(keywords)))

        for ctr, k in enumerate(self.iter_product_keywords(keywords)):
            # barrier_print(k)
            # if ctr % 1 == 0:
            #     if vm.is_root:
            #         print('Iteration {} of {}...'.format(ctr + 1, total_iterations))

            with vm.scoped([0]):
                if vm.is_root:
                    grid = create_gridxy_global(resolution=1.0, dist=False, wrapped=k.wrapped)
                    field = create_exact_field(grid, 'foo', ntime=ntime)
                    path = self.get_temporary_file_path('foo.nc')
                    field.write(path)
                else:
                    path = None
            path = MPI_COMM.bcast(path)

            rd = RequestDataset(path)

            ops = OcgOperations(dataset=rd, geom='state_boundaries', spatial_operation=k.spatial_operation,
                                aggregate=k.aggregate, output_format=k.output_format, prefix=str(ctr),
                                # geom_select_uid=[8]
                                )
            ret = ops.execute()

            # Test area is preserved for a problem element during union. The union's geometry was not fully represented
            # in the output.
            if k.output_format == 'shp' and k.aggregate and k.spatial_operation == 'clip':
                with vm.scoped([0]):
                    if vm.is_root:
                        inn = RequestDataset(ret).get()
                        inn_ugid_idx = np.where(inn['UGID'].get_value() == 8)[0][0]
                        ugid_idx = np.where(state_ugid == 8)[0][0]
                        self.assertAlmostEqual(inn.geom.get_value()[inn_ugid_idx].area, area[ugid_idx], places=2)

            # Test the overview geometry shapefile is written.
            if k.output_format == 'shp':
                directory = os.path.split(ret)[0]
                contents = os.listdir(directory)
                actual = ['_ugid.shp' in c for c in contents]
                self.assertTrue(any(actual))
            elif k.output_format == 'csv-shp':
                directory = os.path.split(ret)[0]
                directory = os.path.join(directory, 'shp')
                contents = os.listdir(directory)
                actual = ['_ugid.shp' in c for c in contents]
                self.assertTrue(any(actual))
                if not k.aggregate:
                    actual = ['_gid.shp' in c for c in contents]
                    self.assertTrue(any(actual))

            if k.output_format == OutputFormatName.OCGIS:
                geom_keys = ret.children.keys()
                all_geom_keys = vm.gather(np.array(geom_keys))
                if vm.is_root:
                    all_geom_keys = hgather(all_geom_keys)
                    self.assertEqual(len(np.unique(all_geom_keys)), 51)

                if k.aggregate:
                    actual = Dict()
                    for field, container in ret.iter_fields(yield_container=True):
                        if not field.is_empty:
                            ugid = container.geom.ugid.get_value()[0]
                            actual[ugid]['actual'] = field.data_variables[0].get_value()
                            actual[ugid]['area'] = container.geom.area[0]

                    actual = vm.gather(actual)

                    if vm.is_root:
                        actual = dgather(actual)

                        ares = []
                        actual_areas = []
                        for ugid_key, v in actual.items():
                            ugid_idx = np.where(state_ugid == ugid_key)[0][0]
                            desired = exact_states[ugid_idx]
                            actual_areas.append(v['area'])
                            for tidx in range(ntime):
                                are = np.abs((desired + ((tidx + 1) * 10)) - v['actual'][tidx, 0])
                                ares.append(are)

                        if k.spatial_operation == 'clip':
                            diff = np.abs(np.array(area) - np.array(actual_areas))
                            self.assertLess(np.max(diff), 1e-6)
                            self.assertLess(np.mean(diff), 1e-6)

                        # Test relative errors.
                        self.assertLess(np.max(ares), 0.031)
                        self.assertLess(np.mean(ares), 0.009)
示例#41
0
文件: geom.py 项目: Ouranosinc/ocgis
def get_masking_slice(intersects_mask_value, target, apply_slice=True):
    """
    Collective!
    
    :param intersects_mask_value: The mask to use for creating the slice indices.
    :type intersects_mask_value: :class:`numpy.ndarray`, dtype=bool
    :param target: The target slicable object to slice.
    :param bool apply_slice: If ``True``, apply the slice.
    """
    raise_if_empty(target)

    if intersects_mask_value is None:
        local_slice = None
    else:
        if intersects_mask_value.all():
            local_slice = None
        elif not intersects_mask_value.any():
            shp = intersects_mask_value.shape
            local_slice = [(0, shp[0]), (0, shp[1])]
        else:
            _, local_slice = get_trimmed_array_by_mask(intersects_mask_value,
                                                       return_adjustments=True)
            local_slice = [(l.start, l.stop) for l in local_slice]

    if local_slice is not None:
        offset_local_slice = [None] * len(local_slice)
        for idx in range(len(local_slice)):
            offset = target.dimensions[idx].bounds_local[0]
            offset_local_slice[idx] = (local_slice[idx][0] + offset,
                                       local_slice[idx][1] + offset)
    else:
        offset_local_slice = None

    gathered_offset_local_slices = vm.gather(offset_local_slice)
    if vm.rank == 0:
        gathered_offset_local_slices = [
            g for g in gathered_offset_local_slices if g is not None
        ]
        if len(gathered_offset_local_slices) == 0:
            raise_empty_subset = True
        else:
            raise_empty_subset = False
            offset_array = np.array(gathered_offset_local_slices)
            global_slice = [None] * offset_array.shape[1]
            for idx in range(len(global_slice)):
                global_slice[idx] = (np.min(offset_array[:, idx, :]),
                                     np.max(offset_array[:, idx, :]))
    else:
        global_slice = None
        raise_empty_subset = None
    raise_empty_subset = vm.bcast(raise_empty_subset)
    if raise_empty_subset:
        raise EmptySubsetError
    global_slice = vm.bcast(global_slice)
    global_slice = tuple([slice(g[0], g[1]) for g in global_slice])

    intersects_mask = Variable(name='mask_gather',
                               value=intersects_mask_value,
                               dimensions=target.dimensions,
                               dtype=bool)

    if apply_slice:
        if vm.size_global > 1:
            ret = target.get_distributed_slice(global_slice)
            ret_mask = intersects_mask.get_distributed_slice(global_slice)
        else:
            ret = target.__getitem__(global_slice)
            ret_mask = intersects_mask.__getitem__(global_slice)
    else:
        ret = target
        ret_mask = intersects_mask

    return ret, ret_mask, global_slice
示例#42
0
    def test_system_converting_state_boundaries_shapefile(self):
        ocgis.env.USE_NETCDF4_MPI = False  # tdk:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug...
        keywords = {'transform_to_crs': [None, Spherical],
                    'use_geometry_iterator': [False, True]}
        actual_xsums = []
        actual_ysums = []
        for k in self.iter_product_keywords(keywords):
            if k.use_geometry_iterator and k.transform_to_crs is not None:
                to_crs = k.transform_to_crs()
            else:
                to_crs = None
            if k.transform_to_crs is None:
                desired_crs = WGS84()
            else:
                desired_crs = k.transform_to_crs()

            rd = RequestDataset(uri=self.path_state_boundaries)
            rd.metadata['schema']['geometry'] = 'MultiPolygon'
            field = rd.get()

            # Test there is no mask present.
            field.geom.load()
            self.assertFalse(field.geom.has_mask)
            self.assertNotIn(VariableName.SPATIAL_MASK, field)
            self.assertIsNone(field.dimension_map.get_spatial_mask())

            self.assertEqual(field.crs, WGS84())
            if k.transform_to_crs is not None:
                field.update_crs(desired_crs)
            try:
                gc = field.geom.convert_to(pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs)
            except ValueError as e:
                try:
                    self.assertFalse(k.use_geometry_iterator)
                    self.assertIsNotNone(to_crs)
                except AssertionError:
                    raise e
                else:
                    continue

            actual_xsums.append(gc.x.get_value().sum())
            actual_ysums.append(gc.y.get_value().sum())
            self.assertEqual(gc.crs, desired_crs)

            # Test there is no mask present after conversion to geometry coordinates.
            self.assertFalse(gc.has_mask)
            self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent)
            self.assertIsNone(gc.dimension_map.get_spatial_mask())

            for v in list(field.values()):
                if v.name != field.geom.name:
                    gc.parent.add_variable(v.extract(), force=True)

            path = self.get_temporary_file_path('esmf_state_boundaries.nc')
            self.assertEqual(gc.parent.crs, desired_crs)
            gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT)

            gathered_geoms = vm.gather(field.geom.get_value())
            if vm.rank == 0:
                actual_geoms = []
                for g in gathered_geoms:
                    actual_geoms.extend(g)

                rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT)
                infield = rd.get()
                self.assertEqual(create_crs(infield.crs.value), desired_crs)
                for dv in field.data_variables:
                    self.assertIn(dv.name, infield)
                ingrid = infield.grid
                self.assertIsInstance(ingrid, GridUnstruct)

                for g in ingrid.archetype.iter_geometries():
                    self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False)

        vm.barrier()

        # Test coordinates have actually changed.
        if not k.use_geometry_iterator:
            for ctr, to_test in enumerate([actual_xsums, actual_ysums]):
                for lhs, rhs in itertools.combinations(to_test, 2):
                    if ctr == 0:
                        self.assertAlmostEqual(lhs, rhs)
                    else:
                        self.assertNotAlmostEqual(lhs, rhs)