Пример #1
0
def main():
    rd = RequestDataset(IN_PATH, driver=DriverNetcdfUGRID, grid_abstraction=GridAbstraction.POINT)
    field = rd.get()
    foo = '/tmp/foo.nc'
    # assert field.grid.cindex is not None
    # print field.grid.archetype
    # tkk
    print field.shapes
    sub = field.grid.get_intersects(box(*BBOX), optimized_bbox_subset=True).parent
    with vm.scoped_by_emptyable('reduce global', sub):
        if not vm.is_null:
            sub.grid_abstraction = GridAbstraction.POLYGON
            # rank_print('sub.grid.abstraction', sub.grid.abstraction)
            # rank_print('sub.grid._abstraction', sub.grid._abstraction)
            # rank_print('archetype', sub.grid.archetype)
            # rank_print(sub.grid.extent)
            rank_print('sub', sub.grid.cindex.get_value())
            subr = sub.grid.reduce_global().parent
            rank_print('sub', subr.grid.cindex.get_value())
            # rank_print(subr.x.name)
            # rank_print(subr.x.get_value().min())
            rank_print(subr.grid.extent)
            # rank_print(subr.grid.cindex.get_value())
            # rank_print(subr.shapes)
            # subr.write(foo)
    # if vm.rank == 0:
    #     RequestDataset(foo).inspect()
    vm.barrier()
Пример #2
0
def arange_from_dimension(dim, start=0, dtype=int, dist=True):
    """
    Create a sequential integer range similar to ``numpy.arange``.

    :param dim: The dimension to use for creating the range.
    :type dim: :class:`~ocgis.Dimension`
    :param int start: The starting value for the range.
    :param dtype: The data type for the output array.
    :param bool dist: If ``True``, create range as a distributed array with a collective VM call. If ``False``, create
     the array locally.
    :rtype: :class:`numpy.ndarray`
    """

    local_size = len(dim)
    if dist:
        from ocgis import vm
        for rank in vm.ranks:
            dest_rank = rank + 1
            if dest_rank == vm.size:
                break
            else:
                if vm.rank == rank:
                    vm.comm.send(start + local_size, dest=dest_rank)
                elif vm.rank == dest_rank:
                    start = vm.comm.recv(source=rank)
                else:
                    pass
        vm.barrier()

    ret = np.arange(start, start + local_size, dtype=dtype)

    return ret
Пример #3
0
def main():
    rd = RequestDataset(IN_PATH,
                        driver=DriverNetcdfUGRID,
                        grid_abstraction=GridAbstraction.POINT)
    field = rd.get()
    foo = '/tmp/foo.nc'
    # assert field.grid.cindex is not None
    # print field.grid.archetype
    # tkk
    print field.shapes
    sub = field.grid.get_intersects(box(*BBOX),
                                    optimized_bbox_subset=True).parent
    with vm.scoped_by_emptyable('reduce global', sub):
        if not vm.is_null:
            sub.grid_abstraction = GridAbstraction.POLYGON
            # rank_print('sub.grid.abstraction', sub.grid.abstraction)
            # rank_print('sub.grid._abstraction', sub.grid._abstraction)
            # rank_print('archetype', sub.grid.archetype)
            # rank_print(sub.grid.extent)
            rank_print('sub', sub.grid.cindex.get_value())
            subr = sub.grid.reduce_global().parent
            rank_print('sub', subr.grid.cindex.get_value())
            # rank_print(subr.x.name)
            # rank_print(subr.x.get_value().min())
            rank_print(subr.grid.extent)
            # rank_print(subr.grid.cindex.get_value())
            # rank_print(subr.shapes)
            # subr.write(foo)
    # if vm.rank == 0:
    #     RequestDataset(foo).inspect()
    vm.barrier()
Пример #4
0
    def test_system_grid_chunking(self):
        if vm.size != 4:
            raise SkipTest('vm.size != 4')

        from ocgis.spatial.grid_chunker import GridChunker
        path = self.path_esmf_unstruct
        rd_dst = RequestDataset(uri=path,
                                driver=DriverESMFUnstruct,
                                crs=Spherical(),
                                grid_abstraction='point',
                                grid_is_isomorphic=True)
        rd_src = deepcopy(rd_dst)
        resolution = 0.28125
        chunk_wd = os.path.join(self.current_dir_output, 'chunks')
        if vm.rank == 0:
            os.mkdir(chunk_wd)
        vm.barrier()
        paths = {'wd': chunk_wd}
        gc = GridChunker(rd_src,
                         rd_dst,
                         nchunks_dst=[8],
                         src_grid_resolution=resolution,
                         dst_grid_resolution=resolution,
                         optimized_bbox_subset=True,
                         paths=paths,
                         genweights=True)
        gc.write_chunks()

        dist = OcgDist()
        local_ctr = Dimension(name='ctr', size=8, dist=True)
        dist.add_dimension(local_ctr)
        dist.update_dimension_bounds()
        for ctr in range(local_ctr.bounds_local[0], local_ctr.bounds_local[1]):
            ctr += 1
            s = os.path.join(chunk_wd, 'split_src_{}.nc'.format(ctr))
            d = os.path.join(chunk_wd, 'split_dst_{}.nc'.format(ctr))
            sf = Field.read(s, driver=DriverESMFUnstruct)
            df = Field.read(d, driver=DriverESMFUnstruct)
            self.assertGreater(sf.grid.shape[0], df.grid.shape[0])

            wgt = os.path.join(chunk_wd, 'esmf_weights_{}.nc'.format(ctr))
            f = Field.read(wgt)
            S = f['S'].v()
            self.assertAlmostEqual(S.min(), 1.0)
            self.assertAlmostEqual(S.max(), 1.0)

        with vm.scoped('merge weights', [0]):
            if not vm.is_null:
                merged_weights = self.get_temporary_file_path(
                    'merged_weights.nc')
                gc.create_merged_weight_file(merged_weights, strict=False)
                f = Field.read(merged_weights)
                S = f['S'].v()
                self.assertAlmostEqual(S.min(), 1.0)
                self.assertAlmostEqual(S.max(), 1.0)
Пример #5
0
    def test(self):
        gs = self.fixture_grid_chunker()

        desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum()
        desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum)
        if vm.rank == 0:
            desired_sum = np.sum(desired_dst_grid_sum)

        desired = [{'y': slice(0, 180, None), 'x': slice(0, 240, None)},
                   {'y': slice(0, 180, None), 'x': slice(240, 480, None)},
                   {'y': slice(0, 180, None), 'x': slice(480, 720, None)},
                   {'y': slice(180, 360, None), 'x': slice(0, 240, None)},
                   {'y': slice(180, 360, None), 'x': slice(240, 480, None)},
                   {'y': slice(180, 360, None), 'x': slice(480, 720, None)}]
        actual = list(gs.iter_dst_grid_slices())
        self.assertEqual(actual, desired)

        gs.write_chunks()

        if vm.rank == 0:
            rank_sums = []

        for ctr in range(1, gs.nchunks_dst[0] * gs.nchunks_dst[1] + 1):
            src_path = gs.create_full_path_from_template('src_template', index=ctr)
            dst_path = gs.create_full_path_from_template('dst_template', index=ctr)

            src_field = RequestDataset(src_path).get()
            dst_field = RequestDataset(dst_path).get()

            src_envelope_global = box(*src_field.grid.extent_global)
            dst_envelope_global = box(*dst_field.grid.extent_global)

            self.assertTrue(does_contain(src_envelope_global, dst_envelope_global))

            actual = get_variable_names(src_field.data_variables)
            self.assertIn('data', actual)

            actual = get_variable_names(dst_field.data_variables)
            self.assertIn('data', actual)
            actual_data_sum = dst_field['data'].get_value().sum()
            actual_data_sum = MPI_COMM.gather(actual_data_sum)
            if MPI_RANK == 0:
                actual_data_sum = np.sum(actual_data_sum)
                rank_sums.append(actual_data_sum)

        if vm.rank == 0:
            self.assertAlmostEqual(desired_sum, np.sum(rank_sums))
            index_path = gs.create_full_path_from_template('index_file')
            self.assertTrue(os.path.exists(index_path))

        vm.barrier()

        index_path = gs.create_full_path_from_template('index_file')
        index_field = RequestDataset(index_path).get()
        self.assertTrue(len(list(index_field.keys())) > 2)
Пример #6
0
    def _write_variable_collection_main_(cls, field, opened_or_path, write_mode, **kwargs):

        from ocgis.collection.field import Field

        if not isinstance(field, Field):
            raise ValueError('Only fields may be written to vector GIS formats.')

        fiona_crs = kwargs.get('crs')
        fiona_schema = kwargs.get('fiona_schema')
        fiona_driver = kwargs.get('fiona_driver', 'ESRI Shapefile')
        iter_kwargs = kwargs.pop('iter_kwargs', {})
        iter_kwargs[KeywordArgument.DRIVER] = cls

        # This finds the geometry variable used in the iterator. Need for the general geometry type that may not be
        # determined using the record iterator.
        geom_variable = field.geom
        if geom_variable is None:
            raise ValueError('A geometry variable is required for writing to vector GIS formats.')

        # Open the output Fiona object using overloaded values or values determined at call-time.
        if not cls.inquire_opened_state(opened_or_path):
            if fiona_crs is None:
                if field.crs is not None:
                    fiona_crs = field.crs.value
            _, archetype_record = next(field.iter(**iter_kwargs))
            archetype_record = format_record_for_fiona(fiona_driver, archetype_record)
            if fiona_schema is None:
                fiona_schema = get_fiona_schema(geom_variable.geom_type, archetype_record)
        else:
            fiona_schema = opened_or_path.schema
            fiona_crs = opened_or_path.crs
            fiona_driver = opened_or_path.driver

        # The Fiona GeoJSON driver does not support update.
        if fiona_driver == 'GeoJSON':
            mode = 'w'
        else:
            mode = 'a'

        # Write the template file.
        if fiona_driver != 'GeoJSON':
            if vm.rank == 0 and write_mode != MPIWriteMode.FILL:
                with driver_scope(cls, opened_or_path=opened_or_path, mode='w', driver=fiona_driver, crs=fiona_crs,
                                  schema=fiona_schema) as _:
                    pass

        # Write data on each rank to the file.
        if write_mode != MPIWriteMode.TEMPLATE:
            for rank_to_write in vm.ranks:
                if vm.rank == rank_to_write:
                    with driver_scope(cls, opened_or_path=opened_or_path, mode=mode, driver=fiona_driver,
                                      crs=fiona_crs, schema=fiona_schema) as sink:
                        itr = field.iter(**iter_kwargs)
                        write_records_to_fiona(sink, itr, fiona_driver)
                vm.barrier()
Пример #7
0
def create_unique_global_array(arr):
    """
    Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero
    elements will be returned. This call is collective across the current VM.

    :param arr: Input array for unique operation.
    :type arr: :class:`numpy.ndarray`
    :rtype: :class:`numpy.ndarray`
    :raises: ValueError
    """

    from ocgis import vm

    if arr is None:
        raise ValueError('Input must be a NumPy array.')

    from ocgis.vmachine.mpi import rank_print
    rank_print('starting np.unique')
    unique_local = np.unique(arr)
    rank_print('finished np.unique')

    rank_print('waiting at barrier1')
    vm.barrier()

    tag_unique_count = MPITag.UNIQUE_GLOBAL_COUNT
    tag_unique_check = MPITag.UNIQUE_GLOBAL_CHECK

    for root_rank in vm.ranks:
        rank_print('root_rank=', root_rank)

        if vm.rank == root_rank:
            has_unique_local = len(unique_local) != 0
        else:
            has_unique_local = None
        has_unique_local = vm.bcast(has_unique_local, root=root_rank)

        if has_unique_local:
            if vm.rank == root_rank:
                for rank in vm.ranks:
                    if rank != vm.rank:
                        vm.comm.send(len(unique_local), dest=rank, tag=tag_unique_count)
                for u in unique_local:
                    for rank in vm.ranks:
                        if rank != vm.rank:
                            vm.comm.send(u, dest=rank, tag=tag_unique_check)
            else:
                recv_count = vm.comm.recv(source=root_rank, tag=tag_unique_count)
                for _ in range(recv_count):
                    u = vm.comm.recv(source=root_rank, tag=tag_unique_check)
                    if u in unique_local:
                        select = np.invert(unique_local == u)
                        unique_local = unique_local[select]
        rank_print('waiting at barrier 2')
        vm.barrier()
    return unique_local
Пример #8
0
Файл: nc.py Проект: wk1984/ocgis
    def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode,
                                         **kwargs):
        assert write_mode is not None

        dataset_kwargs = kwargs.get('dataset_kwargs', {})
        variable_kwargs = kwargs.get('variable_kwargs', {})

        # When filling a dataset, we use append mode.
        if write_mode == MPIWriteMode.FILL:
            mode = 'a'
        else:
            mode = 'w'

        # For an asynchronous write, treat everything like a single rank.
        if write_mode == MPIWriteMode.ASYNCHRONOUS:
            possible_ranks = [0]
        else:
            possible_ranks = vm.ranks

        # Write the data on each rank.
        for idx, rank_to_write in enumerate(possible_ranks):
            # The template write only occurs on the first rank.
            if write_mode == MPIWriteMode.TEMPLATE and rank_to_write != 0:
                pass
            # If this is not a template write, fill the data.
            elif write_mode == MPIWriteMode.ASYNCHRONOUS or vm.rank == rank_to_write:
                with driver_scope(cls,
                                  opened_or_path=opened_or_path,
                                  mode=mode,
                                  **dataset_kwargs) as dataset:
                    # Write global attributes if we are not filling data.
                    if write_mode != MPIWriteMode.FILL:
                        vc.write_attributes_to_netcdf_object(dataset)
                    # This is the main variable write loop.
                    variables_to_write = get_variables_to_write(vc)
                    for variable in variables_to_write:
                        # Load the variable's data before orphaning. The variable needs its parent to know which
                        # group it is in.
                        variable.load()
                        # Call the individual variable write method in fill mode. Orphaning is required as a
                        # variable will attempt to write its parent first.
                        with orphaned(variable, keep_dimensions=True):
                            variable.write(dataset,
                                           write_mode=write_mode,
                                           **variable_kwargs)
                    # Recurse the children.
                    for child in list(vc.children.values()):
                        if write_mode != MPIWriteMode.FILL:
                            group = nc.Group(dataset, child.name)
                        else:
                            group = dataset.groups[child.name]
                        child.write(group, write_mode=write_mode, **kwargs)
                    dataset.sync()
            vm.barrier()
Пример #9
0
    def test_system_grid_chunking(self):
        if vm.size != 4: raise SkipTest('vm.size != 4')

        from ocgis.spatial.grid_chunker import GridChunker
        path = self.path_esmf_unstruct
        rd_dst = RequestDataset(uri=path,
                                driver=DriverESMFUnstruct,
                                crs=Spherical(),
                                grid_abstraction='point',
                                grid_is_isomorphic=True)
        rd_src = deepcopy(rd_dst)
        resolution = 0.28125
        chunk_wd = os.path.join(self.current_dir_output, 'chunks')
        if vm.rank == 0:
            os.mkdir(chunk_wd)
        vm.barrier()
        paths = {'wd': chunk_wd}
        gc = GridChunker(rd_src, rd_dst, nchunks_dst=[8], src_grid_resolution=resolution,
                         dst_grid_resolution=resolution,
                         optimized_bbox_subset=True, paths=paths, genweights=True)
        gc.write_chunks()

        dist = OcgDist()
        local_ctr = Dimension(name='ctr', size=8, dist=True)
        dist.add_dimension(local_ctr)
        dist.update_dimension_bounds()
        for ctr in range(local_ctr.bounds_local[0], local_ctr.bounds_local[1]):
            ctr += 1
            s = os.path.join(chunk_wd, 'split_src_{}.nc'.format(ctr))
            d = os.path.join(chunk_wd, 'split_dst_{}.nc'.format(ctr))
            sf = Field.read(s, driver=DriverESMFUnstruct)
            df = Field.read(d, driver=DriverESMFUnstruct)
            self.assertLessEqual(sf.grid.shape[0] - df.grid.shape[0], 150)
            self.assertGreater(sf.grid.shape[0], df.grid.shape[0])

            wgt = os.path.join(chunk_wd, 'esmf_weights_{}.nc'.format(ctr))
            f = Field.read(wgt)
            S = f['S'].v()
            self.assertAlmostEqual(S.min(), 1.0)
            self.assertAlmostEqual(S.max(), 1.0)

        with vm.scoped('merge weights', [0]):
            if not vm.is_null:
                merged_weights = self.get_temporary_file_path('merged_weights.nc')
                gc.create_merged_weight_file(merged_weights, strict=False)
                f = Field.read(merged_weights)
                S = f['S'].v()
                self.assertAlmostEqual(S.min(), 1.0)
                self.assertAlmostEqual(S.max(), 1.0)
Пример #10
0
Файл: nc.py Проект: NCPP/ocgis
    def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode, **kwargs):
        assert write_mode is not None

        dataset_kwargs = kwargs.get('dataset_kwargs', {})
        variable_kwargs = kwargs.get('variable_kwargs', {})

        # When filling a dataset, we use append mode.
        if write_mode == MPIWriteMode.FILL:
            mode = 'a'
        else:
            mode = 'w'

        # For an asynchronous write, treat everything like a single rank.
        if write_mode == MPIWriteMode.ASYNCHRONOUS:
            possible_ranks = [0]
        else:
            possible_ranks = vm.ranks

        # Write the data on each rank.
        for idx, rank_to_write in enumerate(possible_ranks):
            # The template write only occurs on the first rank.
            if write_mode == MPIWriteMode.TEMPLATE and rank_to_write != 0:
                pass
            # If this is not a template write, fill the data.
            elif write_mode == MPIWriteMode.ASYNCHRONOUS or vm.rank == rank_to_write:
                with driver_scope(cls, opened_or_path=opened_or_path, mode=mode, **dataset_kwargs) as dataset:
                    # Write global attributes if we are not filling data.
                    if write_mode != MPIWriteMode.FILL:
                        vc.write_attributes_to_netcdf_object(dataset)
                    # This is the main variable write loop.
                    variables_to_write = get_variables_to_write(vc)
                    for variable in variables_to_write:
                        # Load the variable's data before orphaning. The variable needs its parent to know which
                        # group it is in.
                        variable.load()
                        # Call the individual variable write method in fill mode. Orphaning is required as a
                        # variable will attempt to write its parent first.
                        with orphaned(variable, keep_dimensions=True):
                            variable.write(dataset, write_mode=write_mode, **variable_kwargs)
                    # Recurse the children.
                    for child in list(vc.children.values()):
                        if write_mode != MPIWriteMode.FILL:
                            group = nc.Group(dataset, child.name)
                        else:
                            group = dataset.groups[child.name]
                        child.write(group, write_mode=write_mode, **kwargs)
                    dataset.sync()
            vm.barrier()
Пример #11
0
    def test_barrier(self):
        if MPI_SIZE != 4:
            raise SkipTest('MPI_SIZE != 4')

        vm = OcgVM()
        live_ranks = [1, 3]
        vm.create_subcomm('for barrier', live_ranks, is_current=True)

        if not vm.is_null:
            self.assertEqual(vm.size, 2)
        else:
            self.assertNotIn(MPI_RANK, live_ranks)

        if MPI_RANK in live_ranks:
            vm.barrier()

        vm.finalize()
Пример #12
0
def arange_from_dimension(dim, start=0, dtype=None, dist=True):
    """
    Create a sequential integer range similar to ``numpy.arange``. Call is collective across the current
    :class:`~ocgis.OcgVM` if ``dist=True`` (the default).

    :param dim: The dimension to use for creating the range.
    :type dim: :class:`~ocgis.Dimension`
    :param int start: The starting value for the range.
    :param dtype: The data type for the output array.
    :param bool dist: If ``True``, create range as a distributed array with a collective VM call. If ``False``, create
     the array locally.
    :rtype: :class:`numpy.ndarray`
    """

    if dtype is None:
        from ocgis import env
        dtype = env.NP_INT

    local_size = len(dim)
    if dist:
        from ocgis import vm
        for rank in vm.ranks:
            dest_rank = rank + 1
            if dest_rank == vm.size:
                break
            else:
                if vm.rank == rank:
                    data = np.array([start + local_size], dtype=dtype)
                    buf = [data, vm.get_mpi_type(dtype)]
                    vm.comm.Send(buf,
                                 dest=dest_rank,
                                 tag=MPITag.ARANGE_FROM_DIMENSION)
                elif vm.rank == dest_rank:
                    data = np.zeros(1, dtype=dtype)
                    buf = [data, vm.get_mpi_type(dtype)]
                    vm.comm.Recv(buf,
                                 source=rank,
                                 tag=MPITag.ARANGE_FROM_DIMENSION)
                    start = data[0]
                else:
                    pass
        vm.barrier()

    ret = np.arange(start, start + local_size, dtype=dtype)

    return ret
Пример #13
0
    def test_barrier(self):
        if MPI_SIZE != 4:
            raise SkipTest('MPI_SIZE != 4')

        vm = OcgVM()
        live_ranks = [1, 3]
        vm.create_subcomm('for barrier', live_ranks, is_current=True)

        if not vm.is_null:
            self.assertEqual(vm.size, 2)
        else:
            self.assertNotIn(MPI_RANK, live_ranks)

        if MPI_RANK in live_ranks:
            vm.barrier()

        vm.finalize()
Пример #14
0
Файл: csv_.py Проект: NCPP/ocgis
    def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode, **kwargs):
        raise_if_empty(vc)

        iter_kwargs = kwargs.pop(KeywordArgument.ITER_KWARGS, {})

        fieldnames = list(six.next(vc.iter(**iter_kwargs))[1].keys())

        if vm.rank == 0 and write_mode != MPIWriteMode.FILL:
            with driver_scope(cls, opened_or_path, mode='w') as opened:
                writer = csv.DictWriter(opened, fieldnames)
                writer.writeheader()
        if write_mode != MPIWriteMode.TEMPLATE:
            for current_rank_write in vm.ranks:
                if vm.rank == current_rank_write:
                    with driver_scope(cls, opened_or_path, mode='a') as opened:
                        writer = csv.DictWriter(opened, fieldnames)
                        for _, record in vc.iter(**iter_kwargs):
                            writer.writerow(record)
                vm.barrier()
Пример #15
0
def arange_from_dimension(dim, start=0, dtype=None, dist=True):
    """
    Create a sequential integer range similar to ``numpy.arange``. Call is collective across the current
    :class:`~ocgis.OcgVM` if ``dist=True`` (the default).

    :param dim: The dimension to use for creating the range.
    :type dim: :class:`~ocgis.Dimension`
    :param int start: The starting value for the range.
    :param dtype: The data type for the output array.
    :param bool dist: If ``True``, create range as a distributed array with a collective VM call. If ``False``, create
     the array locally.
    :rtype: :class:`numpy.ndarray`
    """

    if dtype is None:
        from ocgis import env
        dtype = env.NP_INT

    local_size = len(dim)
    if dist:
        from ocgis import vm
        for rank in vm.ranks:
            dest_rank = rank + 1
            if dest_rank == vm.size:
                break
            else:
                if vm.rank == rank:
                    data = np.array([start + local_size], dtype=dtype)
                    buf = [data, vm.get_mpi_type(dtype)]
                    vm.comm.Send(buf, dest=dest_rank, tag=MPITag.ARANGE_FROM_DIMENSION)
                elif vm.rank == dest_rank:
                    data = np.zeros(1, dtype=dtype)
                    buf = [data, vm.get_mpi_type(dtype)]
                    vm.comm.Recv(buf, source=rank, tag=MPITag.ARANGE_FROM_DIMENSION)
                    start = data[0]
                else:
                    pass
        vm.barrier()

    ret = np.arange(start, start + local_size, dtype=dtype)

    return ret
Пример #16
0
    def _write_variable_collection_main_(cls, vc, opened_or_path, write_mode,
                                         **kwargs):
        raise_if_empty(vc)

        iter_kwargs = kwargs.pop(KeywordArgument.ITER_KWARGS, {})

        fieldnames = list(six.next(vc.iter(**iter_kwargs))[1].keys())

        if vm.rank == 0 and write_mode != MPIWriteMode.FILL:
            with driver_scope(cls, opened_or_path, mode='w') as opened:
                writer = csv.DictWriter(opened, fieldnames)
                writer.writeheader()
        if write_mode != MPIWriteMode.TEMPLATE:
            for current_rank_write in vm.ranks:
                if vm.rank == current_rank_write:
                    with driver_scope(cls, opened_or_path, mode='a') as opened:
                        writer = csv.DictWriter(opened, fieldnames)
                        for _, record in vc.iter(**iter_kwargs):
                            writer.writerow(record)
                vm.barrier()
Пример #17
0
    def write_subsets(self, src_template, dst_template, wgt_template, index_path):
        """
        Write grid subsets to netCDF files using the provided filename templates. The template must contain the full
        file path with a single curly-bracer pair to insert the combination counter. ``wgt_template`` should not be a
        full path. This name is used when generating weight files.

        >>> template_example = '/path/to/data_{}.nc'

        :param str src_template: The template for the source subset file.
        :param str dst_template: The template for the destination subset file.
        :param str wgt_template: The template for the weight filename.

        >>> wgt_template = 'esmf_weights_{}.nc'

        :param index_path: Path to the output indexing netCDF.
        """

        src_filenames = []
        dst_filenames = []
        wgt_filenames = []
        dst_slices = []

        # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst)))

        for ctr, (sub_src, sub_dst, dst_slc) in enumerate(self.iter_src_grid_subsets(yield_dst=True), start=1):
            # padded = create_zero_padded_integer(ctr, nzeros)

            src_path = src_template.format(ctr)
            dst_path = dst_template.format(ctr)
            wgt_filename = wgt_template.format(ctr)

            src_filenames.append(os.path.split(src_path)[1])
            dst_filenames.append(os.path.split(dst_path)[1])
            wgt_filenames.append(wgt_filename)
            dst_slices.append(dst_slc)

            for target, path in zip([sub_src, sub_dst], [src_path, dst_path]):
                if target.is_empty:
                    is_empty = True
                    target = None
                else:
                    is_empty = False
                field = Field(grid=target, is_empty=is_empty)
                ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG)
                with vm.scoped_by_emptyable('field.write', field):
                    if not vm.is_null:
                        field.write(path)
                ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG)

        with vm.scoped('index write', [0]):
            if not vm.is_null:
                dim = Dimension('nfiles', len(src_filenames))
                vname = ['source_filename', 'destination_filename', 'weights_filename']
                values = [src_filenames, dst_filenames, wgt_filenames]
                grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE
                attrs = [{'esmf_role': 'grid_splitter_source'},
                         {'esmf_role': grid_splitter_destination},
                         {'esmf_role': 'grid_splitter_weights'}]

                vc = VariableCollection()

                grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE
                vidx = Variable(name=grid_splitter_index)
                vidx.attrs['esmf_role'] = grid_splitter_index
                vidx.attrs['grid_splitter_source'] = 'source_filename'
                vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename'
                vidx.attrs['grid_splitter_weights'] = 'weights_filename'
                x_bounds = GridSplitterConstants.IndexFile.NAME_X_BOUNDS_VARIABLE
                vidx.attrs[x_bounds] = x_bounds
                y_bounds = GridSplitterConstants.IndexFile.NAME_Y_BOUNDS_VARIABLE
                vidx.attrs[y_bounds] = y_bounds
                vc.add_variable(vidx)

                for idx in range(len(vname)):
                    v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx])
                    vc.add_variable(v)

                bounds_dimension = Dimension(name='bounds', size=2)
                xb = Variable(name=x_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'x_split_bounds'},
                              dtype=int)
                yb = Variable(name=y_bounds, dimensions=[dim, bounds_dimension], attrs={'esmf_role': 'y_split_bounds'},
                              dtype=int)

                x_name = self.dst_grid.x.dimensions[0].name
                y_name = self.dst_grid.y.dimensions[0].name
                for idx, slc in enumerate(dst_slices):
                    xb.get_value()[idx, :] = slc[x_name].start, slc[x_name].stop
                    yb.get_value()[idx, :] = slc[y_name].start, slc[y_name].stop
                vc.add_variable(xb)
                vc.add_variable(yb)

                vc.write(index_path)

        vm.barrier()
Пример #18
0
def reduce_reindex_coordinate_index(cindex, start_index=0):
    """
    Reindex a subset of global coordinate indices contained in the ``cindex`` variable.

    The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure.

    Function will not respect masks.

    The function returns a two-element tuple:

     * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing.
     * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external
       coordinate storage variable or array.

    :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may
     also be a NumPy array.
    :type cindex: :class:`~ocgis.Variable` | :class:`~numpy.ndarray`
    :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``.
    :rtype: tuple
    """

    # Get the coordinate index values as a NumPy array.
    try:
        cindex = cindex.get_value()
    except AttributeError:
        # Assume this is already a NumPy array.
        pass

    # Only work with 1D arrays.
    cindex = np.atleast_1d(cindex)
    # Used to return the coordinate index to the original shape of the incoming coordinate index.
    original_shape = cindex.shape
    cindex = cindex.flatten()

    # Create the unique coordinate index array.
    # barrier_print('before create_unique_global_array')
    u = np.array(create_unique_global_array(cindex))
    # barrier_print('after create_unique_global_array')

    # Synchronize the data type for the new coordinate index.
    lrank = vm.rank
    if lrank == 0:
        dtype = u.dtype
    else:
        dtype = None
    dtype = vm.bcast(dtype)

    # Flag to indicate if the current rank has any unique values.
    has_u = len(u) > 0

    # Create the new coordinate index.
    new_u_dimension = create_distributed_dimension(len(u), name='__new_u_dimension__')
    new_u = arange_from_dimension(new_u_dimension, start=start_index, dtype=dtype)

    # Create a hash for the new index. This is used to remap the old coordinate index.
    if has_u:
        uidx = {ii: jj for ii, jj in zip(u, new_u)}
    else:
        uidx = None

    vm.barrier()

    # Construct local bounds for the rank's unique value. This is used as a cheap index when ranks are looking for
    # index overlaps.
    if has_u:
        local_bounds = min(u), max(u)
    else:
        local_bounds = None
    # Put a copy for the bounds indexing on each rank.
    lb_global = vm.gather(local_bounds)
    lb_global = vm.bcast(lb_global)

    # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds.
    overlaps = []
    for rank, lb in enumerate(lb_global):
        if rank == lrank:
            continue
        if lb is not None:
            contains = lb[0] <= cindex
            contains = np.logical_and(lb[1] >= cindex, contains)
            if np.any(contains):
                overlaps.append(rank)

    # Ranks must be able to identify which ranks will be asking them for data.
    global_overlaps = vm.gather(overlaps)
    global_overlaps = vm.bcast(global_overlaps)
    destinations = [ii for ii, jj in enumerate(global_overlaps) if vm.rank in jj]

    # MPI communication tags used in the algorithm.
    tag_search = MPITag.REDUCE_REINDEX_SEARCH
    tag_success = MPITag.REDUCE_REINDEX_SUCCESS
    tag_child_finished = MPITag.REDUCE_REINDEX_CHILD_FINISHED
    tag_found = MPITag.REDUCE_REINDEX_FOUND

    # Fill array for the new coordinate index.
    new_cindex = np.empty_like(cindex)

    # vm.barrier_print('starting run_rr')
    # Fill the new coordinate indexing.
    if lrank == 0:
        run_rr_root(new_cindex, cindex, uidx, destinations, tag_child_finished, tag_found, tag_search, tag_success)
    else:
        run_rr_nonroot(new_cindex, cindex, uidx, destinations, has_u, overlaps, tag_child_finished, tag_found,
                       tag_search,
                       tag_success)
    # vm.barrier_print('finished run_rr')

    # Return array to its original shape.
    new_cindex = new_cindex.reshape(*original_shape)

    vm.barrier()

    return new_cindex, u
Пример #19
0
    def write_subsets(self):
        """
        Write grid subsets to netCDF files using the provided filename templates.
        """
        src_filenames = []
        dst_filenames = []
        wgt_filenames = []
        dst_slices = []
        src_slices = []
        index_path = self.create_full_path_from_template('index_file')

        # nzeros = len(str(reduce(lambda x, y: x * y, self.nsplits_dst)))

        ctr = 1
        for sub_src, src_slc, sub_dst, dst_slc in self.iter_src_grid_subsets(yield_dst=True):
            # if vm.rank == 0:
            #     vm.rank_print('write_subset iterator count :: {}'.format(ctr))
            #     tstart = time.time()
            # padded = create_zero_padded_integer(ctr, nzeros)

            src_path = self.create_full_path_from_template('src_template', index=ctr)
            dst_path = self.create_full_path_from_template('dst_template', index=ctr)
            wgt_path = self.create_full_path_from_template('wgt_template', index=ctr)

            src_filenames.append(os.path.split(src_path)[1])
            dst_filenames.append(os.path.split(dst_path)[1])
            wgt_filenames.append(wgt_path)
            dst_slices.append(dst_slc)
            src_slices.append(src_slc)

            # Only write destinations if an iterator is not provided.
            if self.iter_dst is None:
                zip_args = [[sub_src, sub_dst], [src_path, dst_path]]
            else:
                zip_args = [[sub_src], [src_path]]

            for target, path in zip(*zip_args):
                with vm.scoped_by_emptyable('field.write', target):
                    if not vm.is_null:
                        ocgis_lh(msg='writing: {}'.format(path), level=logging.DEBUG)
                        field = Field(grid=target)
                        field.write(path)
                        ocgis_lh(msg='finished writing: {}'.format(path), level=logging.DEBUG)

            # Increment the counter outside of the loop to avoid counting empty subsets.
            ctr += 1

            # if vm.rank == 0:
            #     tstop = time.time()
            #     vm.rank_print('timing::write_subset iteration::{}'.format(tstop - tstart))

        # Global shapes require a VM global scope to collect.
        src_global_shape = global_grid_shape(self.src_grid)
        dst_global_shape = global_grid_shape(self.dst_grid)

        # Gather and collapse source slices as some may be empty and we write on rank 0.
        gathered_src_grid_slice = vm.gather(src_slices)
        if vm.rank == 0:
            len_src_slices = len(src_slices)
            new_src_grid_slice = [None] * len_src_slices
            for idx in range(len_src_slices):
                for rank_src_grid_slice in gathered_src_grid_slice:
                    if rank_src_grid_slice[idx] is not None:
                        new_src_grid_slice[idx] = rank_src_grid_slice[idx]
                        break
            src_slices = new_src_grid_slice

        with vm.scoped('index write', [0]):
            if not vm.is_null:
                dim = Dimension('nfiles', len(src_filenames))
                vname = ['source_filename', 'destination_filename', 'weights_filename']
                values = [src_filenames, dst_filenames, wgt_filenames]
                grid_splitter_destination = GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE
                attrs = [{'esmf_role': 'grid_splitter_source'},
                         {'esmf_role': grid_splitter_destination},
                         {'esmf_role': 'grid_splitter_weights'}]

                vc = VariableCollection()

                grid_splitter_index = GridSplitterConstants.IndexFile.NAME_INDEX_VARIABLE
                vidx = Variable(name=grid_splitter_index)
                vidx.attrs['esmf_role'] = grid_splitter_index
                vidx.attrs['grid_splitter_source'] = 'source_filename'
                vidx.attrs[GridSplitterConstants.IndexFile.NAME_DESTINATION_VARIABLE] = 'destination_filename'
                vidx.attrs['grid_splitter_weights'] = 'weights_filename'
                vidx.attrs[GridSplitterConstants.IndexFile.NAME_SRC_GRID_SHAPE] = src_global_shape
                vidx.attrs[GridSplitterConstants.IndexFile.NAME_DST_GRID_SHAPE] = dst_global_shape

                vc.add_variable(vidx)

                for idx in range(len(vname)):
                    v = Variable(name=vname[idx], dimensions=dim, dtype=str, value=values[idx], attrs=attrs[idx])
                    vc.add_variable(v)

                bounds_dimension = Dimension(name='bounds', size=2)
                # TODO: This needs to work with four dimensions.
                # Source -----------------------------------------------------------------------------------------------
                self.src_grid._gs_create_index_bounds_(RegriddingRole.SOURCE, vidx, vc, src_slices, dim,
                                                       bounds_dimension)

                # Destination ------------------------------------------------------------------------------------------
                self.dst_grid._gs_create_index_bounds_(RegriddingRole.DESTINATION, vidx, vc, dst_slices, dim,
                                                       bounds_dimension)

                vc.write(index_path)

        vm.barrier()
Пример #20
0
def create_unique_global_array(arr):
    """
    Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero
    elements will be returned. This call is collective across the current VM.

    :param arr: Input array for unique operation.
    :type arr: :class:`numpy.ndarray`
    :rtype: :class:`numpy.ndarray`
    :raises: ValueError
    """

    from ocgis import vm

    if arr is None:
        raise ValueError('Input must be a NumPy array.')

    unique_local = np.unique(arr)
    vm.barrier()

    local_bounds = min(unique_local), max(unique_local)
    lb_global = vm.gather(local_bounds)
    lb_global = vm.bcast(lb_global)

    # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds.
    overlaps = []
    for rank, lb in enumerate(lb_global):
        if rank == vm.rank:
            continue
        contains = []
        for lb2 in local_bounds:
            if lb[0] <= lb2 <= lb[1]:
                to_app = True
            else:
                to_app = False
            contains.append(to_app)
        if any(contains) or (local_bounds[0] <= lb[0] and local_bounds[1] >= lb[1]):
            overlaps.append(rank)

    # Send out the overlapping sources.
    tag_overlap = MPITag.OVERLAP_CHECK
    tag_select_send_size = MPITag.SELECT_SEND_SIZE
    vm.barrier()

    # NumPy and MPI types.
    np_type = unique_local.dtype
    mpi_type = vm.get_mpi_type(np_type)

    for o in overlaps:
        if vm.rank != o and vm.rank < o:
            dest_rank_bounds = lb_global[o]
            select_send = np.logical_and(unique_local >= dest_rank_bounds[0], unique_local <= dest_rank_bounds[1])
            u_src = unique_local[select_send]
            select_send_size = u_src.size
            _ = vm.comm.Isend([np.array([select_send_size], dtype=np_type), mpi_type], dest=o, tag=tag_select_send_size)
            _ = vm.comm.Isend([u_src, mpi_type], dest=o, tag=tag_overlap)

    # Receive and process conflicts to reduce the unique local values.
    if vm.rank != 0:
        for o in overlaps:
            if vm.rank != o and vm.rank > o:
                select_send_size = np.array([0], dtype=np_type)
                req_select_send_size = vm.comm.Irecv([select_send_size, mpi_type], source=o, tag=tag_select_send_size)
                req_select_send_size.wait()
                select_send_size = select_send_size[0]

                u_src = np.zeros(select_send_size.astype(int), dtype=np_type)
                req = vm.comm.Irecv([u_src, mpi_type], source=o, tag=tag_overlap)
                req.wait()

                utokeep = np.ones_like(unique_local, dtype=bool)
                for uidx, u in enumerate(unique_local.flat):
                    if u in u_src:
                        utokeep[uidx] = False
                unique_local = unique_local[utokeep]

    vm.barrier()
    return unique_local
Пример #21
0
def reduce_reindex_coordinate_variables(cindex, start_index=0):
    """
    Reindex a subset of global coordinate indices contained in the ``cindex`` variable. The coordinate values contained
    in ``coords`` will be reduced to match the coordinates required by the indices in ``cindex``.

    The starting index value (``0`` or ``1``) is set by ``start_index`` for the re-indexing procedure.

    Function will not respect masks.

    The function returns a two-element tuple:

     * First element --> A :class:`numpy.ndarray` with the same dimension as ``cindex`` containing the new indexing.
     * Second element --> A :class:`numpy.ndarray` containing the unique indices that may be used to reduce an external
       coordinate storage variable or array.

    :param cindex: A variable containing coordinate index integer values. This variable may be distributed. This may
     also be a NumPy array.
    :type cindex: :class:`~ocgis.Variable` || :class:`~numpy.ndarray`
    :param int start_index: The first index to use for the re-indexing of ``cindex``. This may be ``0`` or ``1``.
    :rtype: tuple
    """

    # Get the coordinate index values as a NumPy array.
    try:
        cindex = cindex.get_value()
    except AttributeError:
        # Assume this is already a NumPy array.
        pass

    # Create the unique coordinte index array.
    u = np.array(create_unique_global_array(cindex))

    # Holds re-indexed values.
    new_cindex = np.empty_like(cindex)
    # Caches the local re-indexing for the process.
    cache = {}
    # Increment the indexing values based on its presence in the cache.
    curr_idx = 0
    for idx, to_reindex in enumerate(u.flat):
        if to_reindex not in cache:
            cache[to_reindex] = curr_idx
            curr_idx += 1

    # MPI communication tags.
    tag_cache_create = MPITag.REINDEX_CACHE_CREATE
    tag_cache_get_recv = MPITag.REINDEX_CACHE_GET_RECV
    tag_cache_get_send = MPITag.REINDEX_CACHE_GET_SEND

    # This is the local offset to move sequentially across processes. If the local cache is empty, there is no
    # offsetting to move between tasks.
    if len(cache) > 0:
        offset = max(cache.values()) + 1
    else:
        offset = 0

    # Synchronize the processes with the appropriate local offset.
    for idx, rank in enumerate(vm.ranks):
        try:
            dest_rank = vm.ranks[idx + 1]
        except IndexError:
            break
        else:
            if vm.rank == rank:
                vm.comm.send(start_index + offset,
                             dest=dest_rank,
                             tag=tag_cache_create)
            elif vm.rank == dest_rank:
                offset_previous = vm.comm.recv(source=rank,
                                               tag=tag_cache_create)
                start_index = offset_previous
    vm.barrier()

    # Find any missing local coordinate indices that are not mapped by the local cache.
    is_missing = False
    is_missing_indices = []
    for idx, to_reindex in enumerate(cindex.flat):
        try:
            local_new_cindex = cache[to_reindex]
        except KeyError:
            is_missing = True
            is_missing_indices.append(idx)
        else:
            new_cindex[idx] = local_new_cindex + start_index

    # Check if there are any processors missing their new index values.
    is_missing_global = vm.gather(is_missing)
    if vm.rank == 0:
        is_missing_global = any(is_missing_global)
    is_missing_global = vm.bcast(is_missing_global)

    # Execute a search across the process caches for any missing coordinate index values.
    if is_missing_global:
        for rank in vm.ranks:
            is_missing_rank = vm.bcast(is_missing, root=rank)
            if is_missing_rank:
                n_missing = vm.bcast(len(is_missing_indices), root=rank)
                if vm.rank == rank:
                    for imi in is_missing_indices:
                        for subrank in vm.ranks:
                            if vm.rank != subrank:
                                vm.comm.send(cindex[imi],
                                             dest=subrank,
                                             tag=tag_cache_get_recv)
                                new_cindex_element = vm.comm.recv(
                                    source=subrank, tag=tag_cache_get_send)
                                if new_cindex_element is not None:
                                    new_cindex[imi] = new_cindex_element
                else:
                    for _ in range(n_missing):
                        curr_missing = vm.comm.recv(source=rank,
                                                    tag=tag_cache_get_recv)
                        new_cindex_element = cache.get(curr_missing)
                        if new_cindex_element is not None:
                            new_cindex_element += start_index
                        vm.comm.send(new_cindex_element,
                                     dest=rank,
                                     tag=tag_cache_get_send)

    return new_cindex, u
        if vm.rank == 0:
            print 'creating subset:', subset_filename

        with vm.scoped_by_emptyable('grid subset', grid_sub):
            if not vm.is_null:
                extent_global = grid_sub.extent_global
                if vm.rank == 0:
                    root = vm.rank_global
            else:
                extent_global = None

        live_ranks = vm.get_live_ranks_from_object(grid_sub)
        bbox = vm.bcast(extent_global, root=live_ranks[0])

        vm.barrier()
        if vm.rank == 0:
            print 'starting bbox subset:', bbox
        vm.barrier()

        has_subset = get_subset(bbox, subset_filename, 1)

        vm.barrier()
        if vm.rank == 0:
            print 'finished bbox subset:', bbox
        vm.barrier()

        has_subset = vm.gather(has_subset)
        if vm.rank == 0:
            if any(has_subset):
                has_subset = True
Пример #23
0
    def test_system_converting_state_boundaries_shapefile(self):
        ocgis.env.USE_NETCDF4_MPI = False  # tdk:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug...
        keywords = {'transform_to_crs': [None, Spherical],
                    'use_geometry_iterator': [False, True]}
        actual_xsums = []
        actual_ysums = []
        for k in self.iter_product_keywords(keywords):
            if k.use_geometry_iterator and k.transform_to_crs is not None:
                to_crs = k.transform_to_crs()
            else:
                to_crs = None
            if k.transform_to_crs is None:
                desired_crs = WGS84()
            else:
                desired_crs = k.transform_to_crs()

            rd = RequestDataset(uri=self.path_state_boundaries)
            rd.metadata['schema']['geometry'] = 'MultiPolygon'
            field = rd.get()

            # Test there is no mask present.
            field.geom.load()
            self.assertFalse(field.geom.has_mask)
            self.assertNotIn(VariableName.SPATIAL_MASK, field)
            self.assertIsNone(field.dimension_map.get_spatial_mask())

            self.assertEqual(field.crs, WGS84())
            if k.transform_to_crs is not None:
                field.update_crs(desired_crs)
            try:
                gc = field.geom.convert_to(pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs)
            except ValueError as e:
                try:
                    self.assertFalse(k.use_geometry_iterator)
                    self.assertIsNotNone(to_crs)
                except AssertionError:
                    raise e
                else:
                    continue

            actual_xsums.append(gc.x.get_value().sum())
            actual_ysums.append(gc.y.get_value().sum())
            self.assertEqual(gc.crs, desired_crs)

            # Test there is no mask present after conversion to geometry coordinates.
            self.assertFalse(gc.has_mask)
            self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent)
            self.assertIsNone(gc.dimension_map.get_spatial_mask())

            for v in list(field.values()):
                if v.name != field.geom.name:
                    gc.parent.add_variable(v.extract(), force=True)

            path = self.get_temporary_file_path('esmf_state_boundaries.nc')
            self.assertEqual(gc.parent.crs, desired_crs)
            gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT)

            gathered_geoms = vm.gather(field.geom.get_value())
            if vm.rank == 0:
                actual_geoms = []
                for g in gathered_geoms:
                    actual_geoms.extend(g)

                rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT)
                infield = rd.get()
                self.assertEqual(create_crs(infield.crs.value), desired_crs)
                for dv in field.data_variables:
                    self.assertIn(dv.name, infield)
                ingrid = infield.grid
                self.assertIsInstance(ingrid, GridUnstruct)

                for g in ingrid.archetype.iter_geometries():
                    self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False)

        vm.barrier()

        # Test coordinates have actually changed.
        if not k.use_geometry_iterator:
            for ctr, to_test in enumerate([actual_xsums, actual_ysums]):
                for lhs, rhs in itertools.combinations(to_test, 2):
                    if ctr == 0:
                        self.assertAlmostEqual(lhs, rhs)
                    else:
                        self.assertNotAlmostEqual(lhs, rhs)
Пример #24
0
    def test(self):
        gs = self.fixture_grid_chunker()

        desired_dst_grid_sum = gs.dst_grid.parent['data'].get_value().sum()
        desired_dst_grid_sum = MPI_COMM.gather(desired_dst_grid_sum)
        if vm.rank == 0:
            desired_sum = np.sum(desired_dst_grid_sum)

        desired = [{
            'y': slice(0, 180, None),
            'x': slice(0, 240, None)
        }, {
            'y': slice(0, 180, None),
            'x': slice(240, 480, None)
        }, {
            'y': slice(0, 180, None),
            'x': slice(480, 720, None)
        }, {
            'y': slice(180, 360, None),
            'x': slice(0, 240, None)
        }, {
            'y': slice(180, 360, None),
            'x': slice(240, 480, None)
        }, {
            'y': slice(180, 360, None),
            'x': slice(480, 720, None)
        }]
        actual = list(gs.iter_dst_grid_slices())
        self.assertEqual(actual, desired)

        gs.write_chunks()

        if vm.rank == 0:
            rank_sums = []

        for ctr in range(1, gs.nchunks_dst[0] * gs.nchunks_dst[1] + 1):
            src_path = gs.create_full_path_from_template('src_template',
                                                         index=ctr)
            dst_path = gs.create_full_path_from_template('dst_template',
                                                         index=ctr)

            src_field = RequestDataset(src_path).get()
            dst_field = RequestDataset(dst_path).get()

            src_envelope_global = box(*src_field.grid.extent_global)
            dst_envelope_global = box(*dst_field.grid.extent_global)

            self.assertTrue(
                does_contain(src_envelope_global, dst_envelope_global))

            actual = get_variable_names(src_field.data_variables)
            self.assertIn('data', actual)

            actual = get_variable_names(dst_field.data_variables)
            self.assertIn('data', actual)
            actual_data_sum = dst_field['data'].get_value().sum()
            actual_data_sum = MPI_COMM.gather(actual_data_sum)
            if MPI_RANK == 0:
                actual_data_sum = np.sum(actual_data_sum)
                rank_sums.append(actual_data_sum)

        if vm.rank == 0:
            self.assertAlmostEqual(desired_sum, np.sum(rank_sums))
            index_path = gs.create_full_path_from_template('index_file')
            self.assertTrue(os.path.exists(index_path))

        vm.barrier()

        index_path = gs.create_full_path_from_template('index_file')
        index_field = RequestDataset(index_path).get()
        self.assertTrue(len(list(index_field.keys())) > 2)
Пример #25
0
    def get_distributed_slice(self, slc):
        """
        Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be
        empty.
        
        :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be
         processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy
         ``slc`` is not manipulated or redistributed prior to slicing.
        :rtype: :class:`~ocgis.Dimension`
        :raises: :class:`~ocgis.exc.EmptyObjectError`
        """

        raise_if_empty(self)

        slc = get_formatted_slice(slc, 1)[0]
        is_fancy = not isinstance(slc, slice)

        if not is_fancy and slc == slice(None):
            ret = self.copy()
        # Use standard slicing for non-distributed dimensions.
        elif not self.dist:
            ret = self[slc]
        else:
            if is_fancy:
                local_slc = slc
            else:
                local_slc = get_global_to_local_slice((slc.start, slc.stop),
                                                      self.bounds_local)
                if local_slc is not None:
                    local_slc = slice(*local_slc)
            # Slice does not overlap local bounds. The dimension is now empty with size 0.
            if local_slc is None:
                ret = self.copy()
                ret.convert_to_empty()
                dimension_size = 0
            # Slice overlaps so do a slice on the dimension using the local slice.
            else:
                ret = self[local_slc]
                dimension_size = len(ret)
            assert dimension_size >= 0
            dimension_sizes = vm.gather(dimension_size)
            if vm.rank == 0:
                sum_dimension_size = 0
                for ds in dimension_sizes:
                    try:
                        sum_dimension_size += ds
                    except TypeError:
                        pass
                bounds_global = (0, sum_dimension_size)
            else:
                bounds_global = None
            bounds_global = vm.bcast(bounds_global)
            if not ret.is_empty:
                ret.bounds_global = bounds_global

            # Normalize the local bounds on live ranks.
            inner_live_ranks = get_nonempty_ranks(ret, vm)
            with vm.scoped('bounds normalization', inner_live_ranks):
                if not vm.is_null:
                    if vm.rank == 0:
                        adjust = len(ret)
                    else:
                        adjust = None
                    adjust = vm.bcast(adjust)
                    for current_rank in vm.ranks:
                        if vm.rank == current_rank:
                            if vm.rank != 0:
                                ret.bounds_local = [
                                    b + adjust for b in ret.bounds_local
                                ]
                                adjust += len(ret)
                        vm.barrier()
                        adjust = vm.bcast(adjust, root=current_rank)
        return ret
Пример #26
0
def create_unique_global_array(arr):
    """
    Create a distributed NumPy array containing unique elements. If the rank has no unique items, an array with zero
    elements will be returned. This call is collective across the current VM.

    :param arr: Input array for unique operation.
    :type arr: :class:`numpy.ndarray`
    :rtype: :class:`numpy.ndarray`
    :raises: ValueError
    """

    from ocgis import vm

    if arr is None:
        raise ValueError('Input must be a NumPy array.')

    unique_local = np.unique(arr)
    vm.barrier()

    local_bounds = min(unique_local), max(unique_local)
    lb_global = vm.gather(local_bounds)
    lb_global = vm.bcast(lb_global)

    # Find the vm ranks the local rank cares about. It cares if unique values have overlapping unique bounds.
    overlaps = []
    for rank, lb in enumerate(lb_global):
        if rank == vm.rank:
            continue
        contains = []
        for lb2 in local_bounds:
            if lb[0] <= lb2 <= lb[1]:
                to_app = True
            else:
                to_app = False
            contains.append(to_app)
        if any(contains) or (local_bounds[0] <= lb[0]
                             and local_bounds[1] >= lb[1]):
            overlaps.append(rank)

    # Send out the overlapping sources.
    tag_overlap = MPITag.OVERLAP_CHECK
    tag_select_send_size = MPITag.SELECT_SEND_SIZE
    vm.barrier()

    # NumPy and MPI types.
    np_type = unique_local.dtype
    mpi_type = vm.get_mpi_type(np_type)

    for o in overlaps:
        if vm.rank != o and vm.rank < o:
            dest_rank_bounds = lb_global[o]
            select_send = np.logical_and(unique_local >= dest_rank_bounds[0],
                                         unique_local <= dest_rank_bounds[1])
            u_src = unique_local[select_send]
            select_send_size = u_src.size
            _ = vm.comm.Isend(
                [np.array([select_send_size], dtype=np_type), mpi_type],
                dest=o,
                tag=tag_select_send_size)
            _ = vm.comm.Isend([u_src, mpi_type], dest=o, tag=tag_overlap)

    # Receive and process conflicts to reduce the unique local values.
    if vm.rank != 0:
        for o in overlaps:
            if vm.rank != o and vm.rank > o:
                select_send_size = np.array([0], dtype=np_type)
                req_select_send_size = vm.comm.Irecv(
                    [select_send_size, mpi_type],
                    source=o,
                    tag=tag_select_send_size)
                req_select_send_size.wait()
                select_send_size = select_send_size[0]

                u_src = np.zeros(select_send_size.astype(int), dtype=np_type)
                req = vm.comm.Irecv([u_src, mpi_type],
                                    source=o,
                                    tag=tag_overlap)
                req.wait()

                utokeep = np.ones_like(unique_local, dtype=bool)
                for uidx, u in enumerate(unique_local.flat):
                    if u in u_src:
                        utokeep[uidx] = False
                unique_local = unique_local[utokeep]

    vm.barrier()
    return unique_local
Пример #27
0
    def get_distributed_slice(self, slc):
        """
        Slice the dimension in parallel. The sliced dimension object is a shallow copy. The returned dimension may be
        empty.
        
        :param slc: A :class:`slice`-like object or a fancy slice. If this is a fancy slice, ``slc`` must be
         processor-local. If the fancy slice uses integer indices, the indices must be local. In other words, a fancy
         ``slc`` is not manipulated or redistributed prior to slicing.
        :rtype: :class:`~ocgis.Dimension`
        :raises: :class:`~ocgis.exc.EmptyObjectError`
        """

        raise_if_empty(self)

        slc = get_formatted_slice(slc, 1)[0]
        is_fancy = not isinstance(slc, slice)

        if not is_fancy and slc == slice(None):
            ret = self.copy()
        # Use standard slicing for non-distributed dimensions.
        elif not self.dist:
            ret = self[slc]
        else:
            if is_fancy:
                local_slc = slc
            else:
                local_slc = get_global_to_local_slice((slc.start, slc.stop), self.bounds_local)
                if local_slc is not None:
                    local_slc = slice(*local_slc)
            # Slice does not overlap local bounds. The dimension is now empty with size 0.
            if local_slc is None:
                ret = self.copy()
                ret.convert_to_empty()
                dimension_size = 0
            # Slice overlaps so do a slice on the dimension using the local slice.
            else:
                ret = self[local_slc]
                dimension_size = len(ret)
            assert dimension_size >= 0
            dimension_sizes = vm.gather(dimension_size)
            if vm.rank == 0:
                sum_dimension_size = 0
                for ds in dimension_sizes:
                    try:
                        sum_dimension_size += ds
                    except TypeError:
                        pass
                bounds_global = (0, sum_dimension_size)
            else:
                bounds_global = None
            bounds_global = vm.bcast(bounds_global)
            if not ret.is_empty:
                ret.bounds_global = bounds_global

            # Normalize the local bounds on live ranks.
            inner_live_ranks = get_nonempty_ranks(ret, vm)
            with vm.scoped('bounds normalization', inner_live_ranks):
                if not vm.is_null:
                    if vm.rank == 0:
                        adjust = len(ret)
                    else:
                        adjust = None
                    adjust = vm.bcast(adjust)
                    for current_rank in vm.ranks:
                        if vm.rank == current_rank:
                            if vm.rank != 0:
                                ret.bounds_local = [b + adjust for b in ret.bounds_local]
                                adjust += len(ret)
                        vm.barrier()
                        adjust = vm.bcast(adjust, root=current_rank)
        return ret
Пример #28
0
    def test_system_converting_state_boundaries_shapefile(self):
        ocgis.env.USE_NETCDF4_MPI = False  # tdk:FIX: this hangs in the STATE_FIPS write for asynch might be nc4 bug...
        keywords = {'transform_to_crs': [None, Spherical],
                    'use_geometry_iterator': [False, True]}
        actual_xsums = []
        actual_ysums = []
        for k in self.iter_product_keywords(keywords):
            if k.use_geometry_iterator and k.transform_to_crs is not None:
                to_crs = k.transform_to_crs()
            else:
                to_crs = None
            if k.transform_to_crs is None:
                desired_crs = WGS84()
            else:
                desired_crs = k.transform_to_crs()

            rd = RequestDataset(uri=self.path_state_boundaries, variable=['UGID', 'ID'])
            rd.metadata['schema']['geometry'] = 'MultiPolygon'
            field = rd.get()
            self.assertEqual(len(field.data_variables), 2)

            # Test there is no mask present.
            field.geom.load()
            self.assertFalse(field.geom.has_mask)
            self.assertNotIn(VariableName.SPATIAL_MASK, field)
            self.assertIsNone(field.dimension_map.get_spatial_mask())

            self.assertEqual(field.crs, WGS84())
            if k.transform_to_crs is not None:
                field.update_crs(desired_crs)
            self.assertEqual(len(field.data_variables), 2)
            self.assertEqual(len(field.geom.parent.data_variables), 2)
            try:
                gc = field.geom.convert_to(pack=False, use_geometry_iterator=k.use_geometry_iterator, to_crs=to_crs)
            except ValueError as e:
                try:
                    self.assertFalse(k.use_geometry_iterator)
                    self.assertIsNotNone(to_crs)
                except AssertionError:
                    raise e
                else:
                    continue

            actual_xsums.append(gc.x.get_value().sum())
            actual_ysums.append(gc.y.get_value().sum())
            self.assertEqual(gc.crs, desired_crs)

            # Test there is no mask present after conversion to geometry coordinates.
            self.assertFalse(gc.has_mask)
            self.assertNotIn(VariableName.SPATIAL_MASK, gc.parent)
            self.assertIsNone(gc.dimension_map.get_spatial_mask())

            path = self.get_temporary_file_path('esmf_state_boundaries.nc')
            self.assertEqual(gc.parent.crs, desired_crs)
            gc.parent.write(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT)

            gathered_geoms = vm.gather(field.geom.get_value())
            if vm.rank == 0:
                actual_geoms = []
                for g in gathered_geoms:
                    actual_geoms.extend(g)

                rd = RequestDataset(path, driver=DriverKey.NETCDF_ESMF_UNSTRUCT)
                infield = rd.get()
                self.assertEqual(create_crs(infield.crs.value), desired_crs)
                for dv in field.data_variables:
                    self.assertIn(dv.name, infield)
                ingrid = infield.grid
                self.assertIsInstance(ingrid, GridUnstruct)

                for g in ingrid.archetype.iter_geometries():
                    self.assertPolygonSimilar(g[1], actual_geoms[g[0]], check_type=False)

        vm.barrier()

        # Test coordinates have actually changed.
        if not k.use_geometry_iterator:
            for ctr, to_test in enumerate([actual_xsums, actual_ysums]):
                for lhs, rhs in itertools.combinations(to_test, 2):
                    if ctr == 0:
                        self.assertAlmostEqual(lhs, rhs)
                    else:
                        self.assertNotAlmostEqual(lhs, rhs)
Пример #29
0
    def _write_variable_collection_main_(cls, field, opened_or_path,
                                         write_mode, **kwargs):

        from ocgis.collection.field import Field

        if not isinstance(field, Field):
            raise ValueError(
                'Only fields may be written to vector GIS formats.')

        fiona_crs = kwargs.get('crs')
        fiona_schema = kwargs.get('fiona_schema')
        fiona_driver = kwargs.get('fiona_driver', 'ESRI Shapefile')
        iter_kwargs = kwargs.pop('iter_kwargs', {})
        iter_kwargs[KeywordArgument.DRIVER] = cls

        # This finds the geometry variable used in the iterator. Need for the general geometry type that may not be
        # determined using the record iterator.
        geom_variable = field.geom
        if geom_variable is None:
            raise ValueError(
                'A geometry variable is required for writing to vector GIS formats.'
            )

        # Open the output Fiona object using overloaded values or values determined at call-time.
        if not cls.inquire_opened_state(opened_or_path):
            if fiona_crs is None:
                if field.crs is not None:
                    fiona_crs = field.crs.value
            _, archetype_record = next(field.iter(**iter_kwargs))
            archetype_record = format_record_for_fiona(fiona_driver,
                                                       archetype_record)
            if fiona_schema is None:
                fiona_schema = get_fiona_schema(geom_variable.geom_type,
                                                archetype_record)
        else:
            fiona_schema = opened_or_path.schema
            fiona_crs = opened_or_path.crs
            fiona_driver = opened_or_path.driver

        # The Fiona GeoJSON driver does not support update.
        if fiona_driver == 'GeoJSON':
            mode = 'w'
        else:
            mode = 'a'

        # Write the template file.
        if fiona_driver != 'GeoJSON':
            if vm.rank == 0 and write_mode != MPIWriteMode.FILL:
                with driver_scope(cls,
                                  opened_or_path=opened_or_path,
                                  mode='w',
                                  driver=fiona_driver,
                                  crs=fiona_crs,
                                  schema=fiona_schema) as _:
                    pass

        # Write data on each rank to the file.
        if write_mode != MPIWriteMode.TEMPLATE:
            for rank_to_write in vm.ranks:
                if vm.rank == rank_to_write:
                    with driver_scope(cls,
                                      opened_or_path=opened_or_path,
                                      mode=mode,
                                      driver=fiona_driver,
                                      crs=fiona_crs,
                                      schema=fiona_schema) as sink:
                        itr = field.iter(**iter_kwargs)
                        write_records_to_fiona(sink, itr, fiona_driver)
                vm.barrier()