示例#1
0
    def get_shift(self):

        _sfd = host.Array(ncomp=26 * 3, dtype=ctypes.c_double)

        dims = mpi.cartcomm_dims_xyz(self.comm)
        top = mpi.cartcomm_top_xyz(self.comm)
        periods = mpi.cartcomm_periods_xyz(self.comm)

        for dx in range(26):
            dir = mpi.recv_modifiers[dx]

            for ix in range(3):

                if top[ix] == 0 and \
                   periods[ix] == 1 and \
                   dir[ix] == -1:

                    _sfd[dx * 3 + ix] = self.extent[ix]

                elif top[ix] == dims[ix] - 1 and \
                   periods[ix] == 1 and \
                   dir[ix] == 1:
                    _sfd[dx * 3 + ix] = -1. * self.extent[ix]
                else:
                    _sfd[dx * 3 + ix] = 0.0
        return _sfd
示例#2
0
def create_halo_pairs(domain_in, slicexyz, direction):
    """
    Automatically create the pairs of cells for halos.
    """
    cell_array = domain_in.cell_array
    extent = domain_in.extent
    comm = domain_in.comm
    dims = mpi.cartcomm_dims_xyz(comm)
    top = mpi.cartcomm_top_xyz(comm)
    periods = mpi.cartcomm_periods_xyz(comm)

    xr = range(1, cell_array[0] - 1)[slicexyz[0]]
    yr = range(1, cell_array[1] - 1)[slicexyz[1]]
    zr = range(1, cell_array[2] - 1)[slicexyz[2]]

    if not isinstance(xr, collections.abc.Iterable):
        xr = [xr]
    if not isinstance(yr, collections.abc.Iterable):
        yr = [yr]
    if not isinstance(zr, collections.abc.Iterable):
        zr = [zr]

    l = len(xr) * len(yr) * len(zr)

    b_cells = np.zeros(l, dtype=ctypes.c_int)
    h_cells = np.zeros(l, dtype=ctypes.c_int)

    i = 0

    for iz in zr:
        for iy in yr:
            for ix in xr:
                b_cells[i] = ix + (iy + iz * cell_array[1]) * cell_array[0]

                _ix = (ix + direction[0] * 2) % cell_array[0]
                _iy = (iy + direction[1] * 2) % cell_array[1]
                _iz = (iz + direction[2] * 2) % cell_array[2]

                h_cells[i] = _ix + (_iy + _iz * cell_array[1]) * cell_array[0]

                i += 1

    shift = np.zeros(3, dtype=ctypes.c_double)
    for ix in range(3):
        if top[ix] == 0 and periods[ix] == 1 and direction[ix] == -1:
            shift[ix] = extent[ix]
        if top[ix] == dims[ix] - 1 and periods[ix] == 1 and direction[ix] == 1:
            shift[ix] = -1. * extent[ix]


    return b_cells, h_cells, shift
示例#3
0
    def _distribute_domain(self):

        _top = mpi.cartcomm_top_xyz(self.comm)
        _dims = mpi.cartcomm_dims_xyz(self.comm)

        opt.PROFILE[self.__class__.__name__ + ':mpi_dims'] = (_dims)

        self._extent[0] = self._extent_global[0] / _dims[0]
        self._extent[1] = self._extent_global[1] / _dims[1]
        self._extent[2] = self._extent_global[2] / _dims[2]

        _boundary = (-0.5 * self._extent_global[0] + _top[0] * self._extent[0],
                     -0.5 * self._extent_global[0] +
                     (_top[0] + 1.) * self._extent[0],
                     -0.5 * self._extent_global[1] + _top[1] * self._extent[1],
                     -0.5 * self._extent_global[1] +
                     (_top[1] + 1.) * self._extent[1],
                     -0.5 * self._extent_global[2] + _top[2] * self._extent[2],
                     -0.5 * self._extent_global[2] +
                     (_top[2] + 1.) * self._extent[2])

        self._boundary = data.ScalarArray(_boundary, dtype=ctypes.c_double)
        self._boundary_outer = data.ScalarArray(_boundary,
                                                dtype=ctypes.c_double)
示例#4
0
 def dims(self):
     return mpi.cartcomm_dims_xyz(self.comm)
示例#5
0
    def __call__(self):
        t0 = time.time()

        state = self.state
        comm = self.comm

        if comm.size == 1:
            return

        rank = comm.rank
        topo = mpi.cartcomm_top_xyz(comm)
        dims = mpi.cartcomm_dims_xyz(comm)

        extent = state.domain.extent
        boundary = state.domain.boundary

        dist_cell_widths = [1.0 / (ex / dx) for ex, dx in zip(extent, dims)]
        dist_cell_widths = np.array(dist_cell_widths, dtype=REAL)
        npart = state.npart_local
        pos = state.get_position_dat()
        pos = pos.view

        lcount = 0
        lrank_dict = {}
        lpid = []

        rk_offsets = (1, dims[0], dims[0] * dims[1])

        def to_mpi_rank(_p):
            # avoid send if possible
            if ((_p[0] >= boundary[0]) and (_p[0] < boundary[1]) and \
                (_p[1] >= boundary[2]) and (_p[1] < boundary[3]) and \
                (_p[2] >= boundary[4]) and (_p[2] < boundary[5])):
                return rank

            # case where particle needs sending to another rank
            _rk = 0
            for dx in range(3):
                assert _p[dx] <= 0.5 * extent[dx], "outside domain"
                assert _p[dx] >= -0.5 * extent[dx], "outside domain"
                tint = int((_p[dx] + 0.5 * extent[dx]) * dist_cell_widths[dx])
                tint = min(dims[dx] - 1, tint)
                _rk += tint * rk_offsets[dx]

            return _rk

        # find the new remote rank for leaving particles
        t0_local = time.time()
        for px in range(npart):
            rk = to_mpi_rank(pos[px])
            if rk != rank:
                lcount += 1
                if rk not in lrank_dict.keys():
                    lrank_dict[rk] = [px]
                else:
                    lrank_dict[rk].append(px)
                lpid.append(px)
        t_local = time.time() - t0_local
        num_rranks = len(lrank_dict.keys())

        # for each remote rank get accumalate
        t1 = time.time()
        self._check_recv_count_win()

        # prevent sizes going out of scope
        _size_store = []
        #self._win_recv_count.Fence(0)
        lrind = np.zeros((num_rranks, 2), INT64)

        for rki, rk in enumerate(lrank_dict.keys()):
            lrind[rki, 0] = rk
            _size = np.array((len(lrank_dict[rk]), ), INT64)
            _size_store.append(_size)
            self._win_recv_count.Lock(rk, MPI.LOCK_SHARED)
            self._win_recv_count.Get_accumulate(_size_store[-1],
                                                lrind[rki, 1:2], rk)
            self._win_recv_count.Unlock(rk)

        self.comm.Barrier()
        #self._win_recv_count.Fence(MPI.MODE_NOSTORE)
        del _size_store

        opt.PROFILE[self._key_rma1] += time.time() - t1
        opt.PROFILE[self._key_nsend] += lcount

        # pack the send buffer for all particles
        t0_local = time.time()

        # get the views before the copy to avoid excessive syncing with the case
        # of CUDA
        views = {}
        bytes_per_element = {}
        for dat in self.state.particle_dats:
            views[dat] = self._dat_obj(dat).view.view()
            bytes_per_element[dat] = self._byte_per_element(dat)

        nbytes = self._get_nbytes()
        self._check_send_buffer(lcount, nbytes)
        send_offset = 0
        for rk in lrind[:, 0]:
            for px in lrank_dict[rk]:
                s = 0
                for dat in self.state.particle_dats:
                    w = bytes_per_element[dat]
                    n = self._dat_ncomp(dat)
                    w *= n
                    v = self._send[send_offset,
                                   s:s + w:].view(self._dat_dtype(dat))
                    s += w
                    v[:] = views[dat][px, :].copy()
                send_offset += 1
        t_local += time.time() - t0_local

        # need to place the data in the remote buffers here
        self._check_recv_win()

        t2 = time.time()
        # RMA

        send_offset = 0
        for rki in range(num_rranks):
            rk = lrind[rki, 0]
            ri = lrind[rki, 1]
            nsend = len(lrank_dict[rk])
            self._win_recv.Lock(rk, MPI.LOCK_SHARED)
            #self._win_recv.Lock(rk, MPI.LOCK_EXCLUSIVE)
            self._win_recv.Put(self._send[send_offset:send_offset + nsend:, :],
                               rk, ri)
            self._win_recv.Unlock(rk)
            send_offset += nsend

        self.comm.Barrier()
        opt.PROFILE[self._key_rma2] += time.time() - t2
        opt.PROFILE[self._key_nrecv] += self._recv_count.array[0]

        # unpack the data recv'd into dats
        old_npart_local = self.state.npart_local
        self.state.npart_local = old_npart_local + self._recv_count.array[0]

        # get the views before the copy to avoid excessive syncing with the case
        # of CUDA
        views = {}
        bytes_per_element = {}
        for dat in self.state.particle_dats:
            views[dat] = self._dat_obj(dat).view.view()
            bytes_per_element[dat] = self._byte_per_element(dat)

        t0_local = time.time()
        for px in range(self._recv_count.array[0]):
            s = 0
            for dat in self.state.particle_dats:
                w = bytes_per_element[dat]
                n = self._dat_ncomp(dat)
                w *= n
                v = self._recv.array[px, s:s + w:].view(self._dat_dtype(dat))
                s += w
                views[dat][old_npart_local + px, :] = v[:]

        # on some architectures the memory used for compute is different to the
        # exposed numpy view
        for dat in self.state.particle_dats:
            self._dat_obj(dat).sync_view_to_data()

        t_local += time.time() - t0_local
        opt.PROFILE[self._key_local] += t_local

        t0_compress = time.time()
        self.state.remove_by_slot(lpid)
        opt.PROFILE[self._key_compress] += time.time() - t0_compress

        self._free_wins()
        opt.PROFILE[self._key_call] += time.time() - t0
        opt.PROFILE[self._key_call_count] += 1