Пример #1
0
    def _initialize(iet):
        comm = None

        for i in iet.parameters:
            if isinstance(i, MPICommObject):
                comm = i
                break

        if comm is not None:
            rank = Symbol(name='rank')
            rank_decl = LocalExpression(DummyEq(rank, 0))
            rank_init = Call('MPI_Comm_rank', [comm, Byref(rank)])

            ngpus = Symbol(name='ngpus')
            call = Function('omp_get_num_devices')()
            ngpus_init = LocalExpression(DummyEq(ngpus, call))

            set_device_num = Call('omp_set_default_device', [rank % ngpus])

            body = [rank_decl, rank_init, ngpus_init, set_device_num]

            init = List(header=c.Comment('Begin of OpenMP+MPI setup'),
                        body=body,
                        footer=(c.Comment('End of OpenMP+MPI setup'), c.Line()))

            iet = iet._rebuild(body=(init,) + iet.body)

        return iet
Пример #2
0
    def _make_poke(self, hs, key, msgs):
        lflag = Symbol(name='lflag')
        gflag = Symbol(name='gflag')

        # Init flags
        body = [Expression(DummyEq(lflag, 0)), Expression(DummyEq(gflag, 1))]

        # For each msg, build an Iteration calling MPI_Test on all peers
        for msg in msgs:
            dim = Dimension(name='i')
            msgi = IndexedPointer(msg, dim)

            rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
            testrecv = Call(
                'MPI_Test',
                [rrecv, Byref(lflag),
                 Macro('MPI_STATUS_IGNORE')])

            rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
            testsend = Call(
                'MPI_Test',
                [rsend, Byref(lflag),
                 Macro('MPI_STATUS_IGNORE')])

            update = AugmentedExpression(DummyEq(gflag, lflag), '&')

            body.append(
                Iteration([testsend, update, testrecv, update], dim,
                          msg.npeers - 1))

        body.append(Return(gflag))

        return make_efunc('pokempi%d' % key, List(body=body), retval='int')
Пример #3
0
    def _make_wait(self, f, hse, key, msg=None):
        bufs = FieldFromPointer(msg._C_field_bufs, msg)

        ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')

        sizes = [
            FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg)
            for i in range(len(f._dist_dimensions))
        ]
        scatter = Call('scatter_%s' % key, [bufs] + sizes + [f] + ofss)

        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')),
                              scatter)

        rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg))
        waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')])
        rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg))
        waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

        iet = List(body=[waitsend, waitrecv, scatter])
        parameters = ([f] + ofss + [fromrank, msg])
        return Callable('wait_%s' % key, iet, 'void', parameters, ('static', ))
Пример #4
0
    def _make_sendrecv(self, f, hse, key, msg=None):
        comm = f.grid.distributor._obj_comm

        bufg = FieldFromPointer(msg._C_field_bufg, msg)
        bufs = FieldFromPointer(msg._C_field_bufs, msg)

        ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')
        torank = Symbol(name='torank')

        sizes = [FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg)
                 for i in range(len(f._dist_dimensions))]

        gather = Call('gather_%s' % key, [bufg] + sizes + [f] + ofsg)
        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)

        count = reduce(mul, sizes, 1)
        rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg))
        rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg))
        recv = Call('MPI_Irecv', [bufs, count, Macro(dtype_to_mpitype(f.dtype)),
                                  fromrank, Integer(13), comm, rrecv])
        send = Call('MPI_Isend', [bufg, count, Macro(dtype_to_mpitype(f.dtype)),
                                  torank, Integer(13), comm, rsend])

        iet = List(body=[recv, gather, send])
        parameters = ([f] + ofsg + [fromrank, torank, comm, msg])
        return SendRecv(key, iet, parameters, bufg, bufs)
Пример #5
0
    def _make_halowait(self, f, hse, key, msg=None):
        cast = cast_mapper[(f.dtype, '*')]

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        dim = Dimension(name='i')

        msgi = IndexedPointer(msg, dim)

        bufs = FieldFromComposite(msg._C_field_bufs, msgi)

        fromrank = FieldFromComposite(msg._C_field_from, msgi)

        sizes = [FieldFromComposite('%s[%d]' % (msg._C_field_sizes, i), msgi)
                 for i in range(len(f._dist_dimensions))]
        ofss = [FieldFromComposite('%s[%d]' % (msg._C_field_ofss, i), msgi)
                for i in range(len(f._dist_dimensions))]
        ofss = [fixed.get(d) or ofss.pop(0) for d in f.dimensions]

        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Call('scatter%s' % key, [cast(bufs)] + sizes + [f] + ofss)
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter)

        rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
        waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')])
        rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
        waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

        # The -1 below is because an Iteration, by default, generates <=
        ncomms = Symbol(name='ncomms')
        iet = Iteration([waitsend, waitrecv, scatter], dim, ncomms - 1)
        parameters = ([f] + list(fixed.values()) + [msg, ncomms])
        return Callable('halowait%d' % key, iet, 'void', parameters, ('static',))
Пример #6
0
    def test_nested_calls_cgen(self):
        call = Call('foo', [
            Call('bar', [])
        ])

        code = CGen().visit(call)

        assert str(code) == 'foo(bar());'
Пример #7
0
    def instrument(self, iet):
        sections = FindNodes(Section).visit(iet)

        # Transform the Iteration/Expression tree introducing Advisor calls that
        # resume and stop data collection
        mapper = {i: List(body=[Call(self._api_resume), i, Call(self._api_pause)])
                  for i in sections}
        iet = Transformer(mapper).visit(iet)

        return iet
Пример #8
0
def test_find_symbols_nested(mode, expected):
    grid = Grid(shape=(4, 4, 4))
    call = Call('foo', [
        Call('bar',
             [Symbol(name='x'),
              Call('baz', [Function(name='f', grid=grid)])])
    ])

    found = FindSymbols(mode).visit(call)

    assert [f.name for f in found] == eval(expected)
Пример #9
0
    def _specialize_iet(self, iet, **kwargs):
        warning("The OPS backend is still work-in-progress")

        ops_init = Call(namespace['ops_init'], [0, 0, 2])
        ops_partition = Call(namespace['ops_partition'], Literal('""'))
        ops_exit = Call(namespace['ops_exit'])

        ops_block = OpsBlock('block')

        # Extract all symbols that need to be converted to ops_dat
        dims = []
        to_dat = set()
        for section, trees in find_affine_trees(iet).items():
            dims.append(len(trees[0].dimensions))
            symbols = set(FindSymbols('symbolics').visit(trees[0].root))
            symbols -= set(FindSymbols('defines').visit(trees[0].root))
            to_dat |= symbols

        # To ensure deterministic code generation we order the datasets to
        # be generated (since a set is an unordered collection)
        to_dat = filter_sorted(to_dat)

        name_to_ops_dat = {}
        pre_time_loop = []
        for f in to_dat:
            if f.is_Constant:
                continue

            pre_time_loop.extend(create_ops_dat(f, name_to_ops_dat, ops_block))

        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            pre_loop, ops_kernel = opsit(trees, n)

            pre_time_loop.extend(pre_loop)
            self._ops_kernels.append(ops_kernel)

        assert (d == dims[0] for d in dims), \
            "The OPS backend currently assumes that all kernels \
            have the same number of dimensions"

        ops_block_init = Expression(
            ClusterizedEq(
                Eq(ops_block,
                   namespace['ops_decl_block'](dims[0], Literal('"block"')))))

        self._headers.append(namespace['ops_define_dimension'](dims[0]))
        self._includes.append('stdio.h')

        body = [
            ops_init, ops_block_init, *pre_time_loop, ops_partition, iet,
            ops_exit
        ]

        return List(body=body)
Пример #10
0
    def _make_sendrecv(self, f, hse, key, **kwargs):
        comm = f.grid.distributor._obj_comm

        buf_dims = [
            Dimension(name='buf_%s' % d.root) for d in f.dimensions
            if d not in hse.loc_indices
        ]
        bufg = Array(name='bufg',
                     dimensions=buf_dims,
                     dtype=f.dtype,
                     padding=0)
        bufs = Array(name='bufs',
                     dimensions=buf_dims,
                     dtype=f.dtype,
                     padding=0)

        ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]
        ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')
        torank = Symbol(name='torank')

        gather = Call('gather%s' % key, [bufg] + list(bufg.shape) + [f] + ofsg)
        scatter = Call('scatter%s' % key,
                       [bufs] + list(bufs.shape) + [f] + ofss)

        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)
        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')),
                              scatter)

        count = reduce(mul, bufs.shape, 1)
        rrecv = MPIRequestObject(name='rrecv')
        rsend = MPIRequestObject(name='rsend')
        recv = IrecvCall([
            bufs, count,
            Macro(dtype_to_mpitype(f.dtype)), fromrank,
            Integer(13), comm, rrecv
        ])
        send = IsendCall([
            bufg, count,
            Macro(dtype_to_mpitype(f.dtype)), torank,
            Integer(13), comm, rsend
        ])

        waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')])
        waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

        iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter])
        parameters = ([f] + list(bufs.shape) + ofsg + ofss +
                      [fromrank, torank, comm])
        return SendRecv(key, iet, parameters, bufg, bufs)
Пример #11
0
def update_halo(f, fixed):
    """
    Construct an IET performing a halo exchange for a :class:`TensorFunction`.
    """
    # Requirements
    assert f.is_Function
    assert f.grid is not None

    distributor = f.grid.distributor
    nb = distributor._C_neighbours.obj
    comm = distributor._C_comm

    fixed = {d: Symbol(name="o%s" % d.root) for d in fixed}

    mapper = get_views(f, fixed)

    body = []
    masks = []
    for d in f.dimensions:
        if d in fixed:
            continue

        rpeer = FieldFromPointer("%sright" % d, nb)
        lpeer = FieldFromPointer("%sleft" % d, nb)

        # Sending to left, receiving from right
        lsizes, loffsets = mapper[(d, LEFT, OWNED)]
        rsizes, roffsets = mapper[(d, RIGHT, HALO)]
        assert lsizes == rsizes
        sizes = lsizes
        parameters = ([f] + list(f.symbolic_shape) + sizes + loffsets +
                      roffsets + [rpeer, lpeer, comm])
        call = Call('sendrecv_%s' % f.name, parameters)
        mask = Symbol(name='m%sl' % d)
        body.append(Conditional(mask, call))
        masks.append(mask)

        # Sending to right, receiving from left
        rsizes, roffsets = mapper[(d, RIGHT, OWNED)]
        lsizes, loffsets = mapper[(d, LEFT, HALO)]
        assert rsizes == lsizes
        sizes = rsizes
        parameters = ([f] + list(f.symbolic_shape) + sizes + roffsets +
                      loffsets + [lpeer, rpeer, comm])
        call = Call('sendrecv_%s' % f.name, parameters)
        mask = Symbol(name='m%sr' % d)
        body.append(Conditional(mask, call))
        masks.append(mask)

    iet = List(body=body)
    parameters = ([f] + masks + [comm, nb] + list(fixed.values()) +
                  [d.symbolic_size for d in f.dimensions])
    return Callable('halo_exchange_%s' % f.name, iet, 'void', parameters,
                    ('static', ))
Пример #12
0
    def _make_haloupdate(self, f, hse, key, msg=None):
        comm = f.grid.distributor._obj_comm

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        dim = Dimension(name='i')

        msgi = IndexedPointer(msg, dim)

        bufg = FieldFromComposite(msg._C_field_bufg, msgi)
        bufs = FieldFromComposite(msg._C_field_bufs, msgi)

        fromrank = FieldFromComposite(msg._C_field_from, msgi)
        torank = FieldFromComposite(msg._C_field_to, msgi)

        sizes = [
            FieldFromComposite('%s[%d]' % (msg._C_field_sizes, i), msgi)
            for i in range(len(f._dist_dimensions))
        ]
        ofsg = [
            FieldFromComposite('%s[%d]' % (msg._C_field_ofsg, i), msgi)
            for i in range(len(f._dist_dimensions))
        ]
        ofsg = [fixed.get(d) or ofsg.pop(0) for d in f.dimensions]

        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Call('gather_%s' % key, [bufg] + sizes + [f] + ofsg)
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)

        # Make Irecv/Isend
        count = reduce(mul, sizes, 1)
        rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
        rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
        recv = Call('MPI_Irecv', [
            bufs, count,
            Macro(dtype_to_mpitype(f.dtype)), fromrank,
            Integer(13), comm, rrecv
        ])
        send = Call('MPI_Isend', [
            bufg, count,
            Macro(dtype_to_mpitype(f.dtype)), torank,
            Integer(13), comm, rsend
        ])

        # The -1 below is because an Iteration, by default, generates <=
        ncomms = Symbol(name='ncomms')
        iet = Iteration([recv, gather, send], dim, ncomms - 1)
        parameters = ([f, comm, msg, ncomms]) + list(fixed.values())
        return Callable('haloupdate%d' % key, iet, 'void', parameters,
                        ('static', ))
Пример #13
0
 def _call_sendrecv(self, name, *args, msg=None, haloid=None):
     # Drop `sizes` as this HaloExchangeBuilder conveys them through `msg`
     # Drop `ofss` as this HaloExchangeBuilder only needs them in `wait()`,
     # to collect and scatter the result of an MPI_Irecv
     f, _, ofsg, _, fromrank, torank, comm = args
     msg = Byref(IndexedPointer(msg, haloid))
     return Call(name, [f] + ofsg + [fromrank, torank, comm, msg])
Пример #14
0
    def _make_fetchupdate(self, iet, sync_ops, pieces, *args):
        # Construct fetches
        postactions = []
        for s in sync_ops:
            # The condition is already encoded in `iet` with a Conditional,
            # which stems from the originating Cluster's guards
            assert s.fcond is None

            imask = [(s.tstore, s.size) if d.root is s.dim.root else FULL
                     for d in s.dimensions]
            postactions.append(
                PragmaTransfer(self.lang._map_update_device,
                               s.target,
                               imask=imask))

        # Turn init IET into a Callable
        functions = filter_ordered(
            flatten([(s.target, s.function) for s in sync_ops]))
        name = self.sregistry.make_name(prefix='init_device')
        body = List(body=iet.body + tuple(postactions))
        parameters = filter_sorted(functions + derive_parameters(body))
        func = Callable(name, body, 'void', parameters, 'static')
        pieces.funcs.append(func)

        # Perform initial fetch by the main thread
        iet = List(header=c.Comment("Initialize data stream"),
                   body=Call(name, parameters))

        return iet
Пример #15
0
    def _make_halowait(self, f, hse, key, msg=None):
        nb = f.grid.distributor._obj_neighborhood
        wait = self._cache_dims[f.dimensions][2]

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        # Only retain the halos required by the Diag scheme
        # Note: `sorted` is only for deterministic code generation
        halos = sorted(i for i in hse.halos if isinstance(i.dim, tuple))

        body = []
        for dims, tosides in halos:
            mapper = OrderedDict(zip(dims, [i.flip() for i in tosides]))
            fromrank = FieldFromPointer(
                ''.join(i.name[0] for i in mapper.values()), nb)
            ofss = [
                fixed.get(d,
                          f._C_get_field(HALO, d, mapper.get(d)).offset)
                for d in f.dimensions
            ]

            msgi = Byref(IndexedPointer(msg, len(body)))

            body.append(Call(wait.name, [f] + ofss + [fromrank, msgi]))

        iet = List(body=body)
        parameters = [f] + list(fixed.values()) + [nb, msg]
        return Callable('halowait%d' % key, iet, 'void', parameters,
                        ('static', ))
Пример #16
0
    def _(iet):
        # TODO: we need to pick the rank from `comm_shm`, not `comm`,
        # so that we have nranks == ngpus (as long as the user has launched
        # the right number of MPI processes per node given the available
        # number of GPUs per node)

        objcomm = None
        for i in iet.parameters:
            if isinstance(i, MPICommObject):
                objcomm = i
                break

        deviceid = DeviceID()
        if objcomm is not None:
            rank = Symbol(name='rank')
            rank_decl = LocalExpression(DummyEq(rank, 0))
            rank_init = Call('MPI_Comm_rank', [objcomm, Byref(rank)])

            ngpus = Symbol(name='ngpus')
            call = Function('omp_get_num_devices')()
            ngpus_init = LocalExpression(DummyEq(ngpus, call))

            osdd_then = Call('omp_set_default_device', [deviceid])
            osdd_else = Call('omp_set_default_device', [rank % ngpus])

            body = [
                Conditional(
                    CondNe(deviceid, -1),
                    osdd_then,
                    List(body=[rank_decl, rank_init, ngpus_init, osdd_else]),
                )
            ]
        else:
            body = [
                Conditional(CondNe(deviceid, -1),
                            Call('omp_set_default_device', [deviceid]))
            ]

        init = List(header=c.Comment('Begin of OpenMP+MPI setup'),
                    body=body,
                    footer=(c.Comment('End of OpenMP+MPI setup'), c.Line()))
        iet = iet._rebuild(body=(init, ) + iet.body)

        return iet, {'args': deviceid}
Пример #17
0
    def make(self, halo_spots):
        """
        Construct Callables and Calls implementing a halo exchange for the
        provided HaloSpots.

        For each (unique) HaloSpot, three Callables are built:

            * ``update_halo``, to be called when a halo exchange is necessary,
            * ``sendrecv``, called multiple times by ``update_halo``.
            * ``copy``, called twice by ``sendrecv``, to implement, for example,
              data gathering prior to an MPI_Send, and data scattering following
              an MPI recv.
        """
        calls = OrderedDict()
        generated = OrderedDict()
        for hs in halo_spots:
            for f, v in hs.fmapper.items():
                # Sanity check
                assert f.is_Function
                assert f.grid is not None

                # Callables construction
                # ----------------------
                # Note: to construct the halo exchange Callables, use the generic `df`,
                # instead of `f`, so that we don't need to regenerate code for Functions
                # that are symbolically identical to `f` except for the name
                df = f.__class__.__base__(name='a',
                                          grid=f.grid,
                                          shape=f.shape_global,
                                          dimensions=f.dimensions)
                # `gather`, `scatter`, `sendrecv` are generic by construction -- they
                # only need to be generated once for each `ndim`
                if f.ndim not in generated:
                    gather, extra = self._make_copy(df, v.loc_indices)
                    scatter, _ = self._make_copy(df, v.loc_indices, swap=True)
                    sendrecv = self._make_sendrecv(df, v.loc_indices, extra)
                    generated[f.ndim] = [gather, scatter, sendrecv]
                # `haloupdate` is generic by construction -- it only needs to be
                # generated once for each (`ndim`, `mask`)
                if (f.ndim, v) not in generated:
                    uniquekey = len(
                        [i for i in generated if isinstance(i, tuple)])
                    generated[(f.ndim, v)] = [
                        self._make_haloupdate(df, v.loc_indices, hs.mask[f],
                                              extra, uniquekey)
                    ]

                # `haloupdate` Call construction
                comm = f.grid.distributor._obj_comm
                nb = f.grid.distributor._obj_neighborhood
                loc_indices = list(v.loc_indices.values())
                args = [f, comm, nb] + loc_indices + extra
                call = Call(generated[(f.ndim, v)][0].name, args)
                calls.setdefault(hs, []).append(call)

        return flatten(generated.values()), calls
Пример #18
0
    def __make_init_threads(self, threads, sdata, tfunc, pieces):
        d = threads.index
        if threads.size == 1:
            callback = lambda body: body
        else:
            callback = lambda body: Iteration(body, d, threads.size - 1)

        idinit = DummyExpr(FieldFromComposite(sdata._field_id, sdata[d]),
                           1 + sum(i.size for i in pieces.threads) + d)
        arguments = list(tfunc.parameters)
        arguments[-1] = sdata.symbolic_base + d
        call = Call('std::thread',
                    Call(tfunc.name, arguments, is_indirect=True),
                    retobj=threads[d])
        threadsinit = List(header=c.Comment("Fire up and initialize `%s`" %
                                            threads.name),
                           body=callback([idinit, call]))

        return threadsinit
Пример #19
0
def get_ops_args(args, stencils, name_to_dat):
    ops_args = []

    for arg in args:
        if arg.is_Constant:
            ops_args.append(
                Call("ops_arg_gbl", [
                    Byref(Constant(name=arg.name[1:])), 1,
                    String(dtype_to_cstr(arg.dtype)), OPS_READ
                ], False))
        else:
            ops_args.append(
                Call("ops_arg_dat", [
                    name_to_dat[arg.name], 1, stencils[arg.name],
                    String(dtype_to_cstr(arg.dtype)),
                    OPS_WRITE if arg.is_Write else OPS_READ
                ], False))

    return ops_args
Пример #20
0
def test_call_indexed():
    grid = Grid(shape=(10, 10))

    u = Function(name='u', grid=grid)

    foo = Callable('foo', DummyExpr(u, 1), 'void', parameters=[u, u.indexed])
    call = Call(foo.name, [u, u.indexed])

    assert str(call) == "foo(u_vec,u);"
    assert str(foo) == """\
Пример #21
0
    def _initialize(iet):
        # TODO: we need to pick the rank from `comm_shm`, not `comm`,
        # so that we have nranks == ngpus (as long as the user has launched
        # the right number of MPI processes per node given the available
        # number of GPUs per node)
        comm = None
        for i in iet.parameters:
            if isinstance(i, MPICommObject):
                comm = i
                break

        device_nvidia = Macro('acc_device_nvidia')
        body = Call('acc_init', [device_nvidia])

        if comm is not None:
            rank = Symbol(name='rank')
            rank_decl = LocalExpression(DummyEq(rank, 0))
            rank_init = Call('MPI_Comm_rank', [comm, Byref(rank)])

            ngpus = Symbol(name='ngpus')
            call = DefFunction('acc_get_num_devices', device_nvidia)
            ngpus_init = LocalExpression(DummyEq(ngpus, call))

            devicenum = Symbol(name='devicenum')
            devicenum_init = LocalExpression(DummyEq(devicenum, rank % ngpus))

            set_device_num = Call('acc_set_device_num',
                                  [devicenum, device_nvidia])

            body = [
                rank_decl, rank_init, ngpus_init, devicenum_init,
                set_device_num, body
            ]

        init = List(header=c.Comment('Begin of OpenACC+MPI setup'),
                    body=body,
                    footer=(c.Comment('End of OpenACC+MPI setup'), c.Line()))

        iet = iet._rebuild(body=(init, ) + iet.body)

        return iet
Пример #22
0
    def _generate_mpi(self, iet, **kwargs):
        if configuration['mpi'] is False:
            return iet

        halo_spots = FindNodes(HaloSpot).visit(iet)

        # For each MPI-distributed TensorFunction, generate all necessary
        # C-level routines to perform a halo update
        callables = OrderedDict()
        for hs in halo_spots:
            for f, v in hs.fmapper.items():
                callables[f] = [update_halo(f, v.loc_indices)]
                callables[f].append(sendrecv(f, v.loc_indices))
                callables[f].append(copy(f, v.loc_indices))
                callables[f].append(copy(f, v.loc_indices, True))
        callables = flatten(callables.values())

        # Replace HaloSpots with suitable calls performing the halo update
        mapper = {}
        for hs in halo_spots:
            for f, v in hs.fmapper.items():
                stencil = [int(i) for i in hs.mask[f].values()]
                comm = f.grid.distributor._C_comm
                nb = f.grid.distributor._C_neighbours.obj
                loc_indices = list(v.loc_indices.values())
                dsizes = [d.symbolic_size for d in f.dimensions]
                parameters = [f] + stencil + [comm, nb] + loc_indices + dsizes
                call = Call('halo_exchange_%s' % f.name, parameters)
                mapper.setdefault(hs, []).append(call)

        # Sorting is for deterministic code generation. However, in practice,
        # we don't expect `cstructs` to contain more than one element because
        # there should always be one grid per Operator (though we're not really
        # enforcing it)
        cstructs = {
            f.grid.distributor._C_neighbours.cdef
            for f in flatten(i.fmapper for i in halo_spots)
        }
        self._globals.extend(sorted(cstructs, key=lambda i: i.tpname))

        self._includes.append('mpi.h')

        self._func_table.update(
            OrderedDict([(i.name, MetaCall(i, True)) for i in callables]))

        # Add in the halo update calls
        mapper = {k: List(body=v + list(k.body)) for k, v in mapper.items()}
        iet = Transformer(mapper, nested=True).visit(iet)

        return iet
Пример #23
0
    def _make_poke(self, hs, key, msgs):
        flag = Symbol(name='flag')
        initflag = LocalExpression(DummyEq(flag, 0))

        body = [initflag]
        for msg in msgs:
            dim = Dimension(name='i')
            msgi = IndexedPointer(msg, dim)

            rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
            rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
            testrecv = Call(
                'MPI_Test',
                [rrecv, Byref(flag),
                 Macro('MPI_STATUS_IGNORE')])
            testsend = Call(
                'MPI_Test',
                [rsend, Byref(flag),
                 Macro('MPI_STATUS_IGNORE')])

            body.append(Iteration([testsend, testrecv], dim, msg.npeers - 1))

        return make_efunc('pokempi%d' % key, body)
Пример #24
0
    def __make_finalize_threads(self, threads, sdata):
        d = threads.index
        if threads.size == 1:
            callback = lambda body: body
        else:
            callback = lambda body: Iteration(body, d, threads.size - 1)

        threadswait = List(
            header=c.Comment("Wait for completion of `%s`" % threads.name),
            body=callback([
                While(
                    CondEq(FieldFromComposite(sdata._field_flag, sdata[d]),
                           2)),
                DummyExpr(FieldFromComposite(sdata._field_flag, sdata[d]), 0),
                Call(FieldFromComposite('join', threads[d]))
            ]))

        return threadswait
Пример #25
0
    def _generate_mpi(self, iet, **kwargs):
        if configuration['mpi'] is False:
            return iet

        # For each function, generate all necessary C-level routines to perform
        # a halo exchange
        mapper = {}
        callables = []
        cstructs = set()
        for hs in FindNodes(HaloSpot).visit(iet):
            for f, v in hs.fmapper.items():
                callables.append(update_halo(f, hs.fixed[f]))
                callables.append(sendrecv(f, hs.fixed[f]))
                callables.extend(
                    [copy(f, hs.fixed[f]),
                     copy(f, hs.fixed[f], True)])

                stencil = [int(i) for i in hs.mask[f].values()]
                comm = f.grid.distributor._C_comm
                nb = f.grid.distributor._C_neighbours.obj
                fixed = list(hs.fixed[f].values())
                dsizes = [d.symbolic_size for d in f.dimensions]
                parameters = [f] + stencil + [comm, nb] + fixed + dsizes
                call = Call('halo_exchange_%s' % f.name, parameters)
                mapper.setdefault(hs, []).append(call)

                cstructs.add(f.grid.distributor._C_neighbours.cdef)

        self._func_table.update(
            OrderedDict([(i.name, MetaCall(i, True)) for i in callables]))

        # Sorting is for deterministic code generation. However, in practice,
        # we don't expect `cstructs` to contain more than one element because
        # there should always be one grid per Operator (though we're not really
        # enforcing this)
        self._globals.extend(sorted(cstructs, key=lambda i: i.tpname))

        self._includes.append('mpi.h')

        # Add in the halo update calls
        mapper = {k: List(body=v + list(k.body)) for k, v in mapper.items()}
        iet = Transformer(mapper).visit(iet)

        return iet
Пример #26
0
def sendrecv(f, fixed):
    """Construct an IET performing a halo exchange along arbitrary
    dimension and side."""
    assert f.is_Function
    assert f.grid is not None

    comm = f.grid.distributor._C_comm

    buf_dims = [Dimension(name='buf_%s' % d.root) for d in f.dimensions if d not in fixed]
    bufg = Array(name='bufg', dimensions=buf_dims, dtype=f.dtype, scope='heap')
    bufs = Array(name='bufs', dimensions=buf_dims, dtype=f.dtype, scope='heap')

    dat_dims = [Dimension(name='dat_%s' % d.root) for d in f.dimensions]
    dat = Array(name='dat', dimensions=dat_dims, dtype=f.dtype, scope='external')

    ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]
    ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

    fromrank = Symbol(name='fromrank')
    torank = Symbol(name='torank')

    parameters = [bufg] + list(bufg.shape) + [dat] + list(dat.shape) + ofsg
    gather = Call('gather_%s' % f.name, parameters)
    parameters = [bufs] + list(bufs.shape) + [dat] + list(dat.shape) + ofss
    scatter = Call('scatter_%s' % f.name, parameters)

    # The scatter must be guarded as we must not alter the halo values along
    # the domain boundary, where the sender is actually MPI.PROC_NULL
    scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter)

    srecv = MPIStatusObject(name='srecv')
    rrecv = MPIRequestObject(name='rrecv')
    rsend = MPIRequestObject(name='rsend')

    count = reduce(mul, bufs.shape, 1)
    recv = Call('MPI_Irecv', [bufs, count, Macro(numpy_to_mpitypes(f.dtype)),
                              fromrank, '13', comm, rrecv])
    send = Call('MPI_Isend', [bufg, count, Macro(numpy_to_mpitypes(f.dtype)),
                              torank, '13', comm, rsend])

    waitrecv = Call('MPI_Wait', [rrecv, srecv])
    waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

    iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter])
    iet = List(body=[ArrayCast(dat), iet_insert_C_decls(iet)])
    parameters = ([dat] + list(dat.shape) + list(bufs.shape) +
                  ofsg + ofss + [fromrank, torank, comm])
    return Callable('sendrecv_%s' % f.name, iet, 'void', parameters, ('static',))
Пример #27
0
 def _call_halowait(self, name, f, hse, msg):
     return Call(name,
                 [f] + list(hse.loc_indices.values()) + [msg, msg.npeers])
Пример #28
0
 def _call_haloupdate(self, name, f, hse, msg):
     comm = f.grid.distributor._obj_comm
     return Call(name, [f, comm, msg, msg.npeers] +
                 list(hse.loc_indices.values()))
Пример #29
0
 def _call_halowait(self, name, f, hse, msg):
     nb = f.grid.distributor._obj_neighborhood
     return Call(name, [f] + list(hse.loc_indices.values()) + [nb, msg])
Пример #30
0
 def _call_haloupdate(self, name, f, hse, *args):
     comm = f.grid.distributor._obj_comm
     nb = f.grid.distributor._obj_neighborhood
     args = [f, comm, nb] + list(hse.loc_indices.values())
     return Call(name, flatten(args))