Exemplo n.º 1
0
    def _make_halowait(self, f, hse, key, msg=None):
        cast = cast_mapper[(f.dtype, '*')]

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        dim = Dimension(name='i')

        msgi = IndexedPointer(msg, dim)

        bufs = FieldFromComposite(msg._C_field_bufs, msgi)

        fromrank = FieldFromComposite(msg._C_field_from, msgi)

        sizes = [FieldFromComposite('%s[%d]' % (msg._C_field_sizes, i), msgi)
                 for i in range(len(f._dist_dimensions))]
        ofss = [FieldFromComposite('%s[%d]' % (msg._C_field_ofss, i), msgi)
                for i in range(len(f._dist_dimensions))]
        ofss = [fixed.get(d) or ofss.pop(0) for d in f.dimensions]

        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Call('scatter%s' % key, [cast(bufs)] + sizes + [f] + ofss)
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter)

        rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
        waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')])
        rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
        waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

        # The -1 below is because an Iteration, by default, generates <=
        ncomms = Symbol(name='ncomms')
        iet = Iteration([waitsend, waitrecv, scatter], dim, ncomms - 1)
        parameters = ([f] + list(fixed.values()) + [msg, ncomms])
        return Callable('halowait%d' % key, iet, 'void', parameters, ('static',))
Exemplo n.º 2
0
    def _make_wait(self, f, hse, key, msg=None):
        bufs = FieldFromPointer(msg._C_field_bufs, msg)

        ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')

        sizes = [
            FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg)
            for i in range(len(f._dist_dimensions))
        ]
        scatter = Call('scatter_%s' % key, [bufs] + sizes + [f] + ofss)

        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')),
                              scatter)

        rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg))
        waitrecv = Call('MPI_Wait', [rrecv, Macro('MPI_STATUS_IGNORE')])
        rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg))
        waitsend = Call('MPI_Wait', [rsend, Macro('MPI_STATUS_IGNORE')])

        iet = List(body=[waitsend, waitrecv, scatter])
        parameters = ([f] + ofss + [fromrank, msg])
        return Callable('wait_%s' % key, iet, 'void', parameters, ('static', ))
Exemplo n.º 3
0
    def _make_sendrecv(self, f, hse, key, msg=None):
        comm = f.grid.distributor._obj_comm

        bufg = FieldFromPointer(msg._C_field_bufg, msg)
        bufs = FieldFromPointer(msg._C_field_bufs, msg)

        ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')
        torank = Symbol(name='torank')

        sizes = [FieldFromPointer('%s[%d]' % (msg._C_field_sizes, i), msg)
                 for i in range(len(f._dist_dimensions))]

        gather = Call('gather%s' % key, [bufg] + sizes + [f] + ofsg)
        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)

        count = reduce(mul, sizes, 1)
        rrecv = Byref(FieldFromPointer(msg._C_field_rrecv, msg))
        rsend = Byref(FieldFromPointer(msg._C_field_rsend, msg))
        recv = IrecvCall([bufs, count, Macro(dtype_to_mpitype(f.dtype)),
                         fromrank, Integer(13), comm, rrecv])
        send = IsendCall([bufg, count, Macro(dtype_to_mpitype(f.dtype)),
                         torank, Integer(13), comm, rsend])

        iet = List(body=[recv, gather, send])
        parameters = ([f] + ofsg + [fromrank, torank, comm, msg])
        return SendRecv(key, iet, parameters, bufg, bufs)
Exemplo n.º 4
0
    def _make_poke(self, hs, key, msgs):
        lflag = Symbol(name='lflag')
        gflag = Symbol(name='gflag')

        # Init flags
        body = [Expression(DummyEq(lflag, 0)), Expression(DummyEq(gflag, 1))]

        # For each msg, build an Iteration calling MPI_Test on all peers
        for msg in msgs:
            dim = Dimension(name='i')
            msgi = IndexedPointer(msg, dim)

            rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
            testrecv = Call(
                'MPI_Test',
                [rrecv, Byref(lflag),
                 Macro('MPI_STATUS_IGNORE')])

            rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
            testsend = Call(
                'MPI_Test',
                [rsend, Byref(lflag),
                 Macro('MPI_STATUS_IGNORE')])

            update = AugmentedExpression(DummyEq(gflag, lflag), '&')

            body.append(
                Iteration([testsend, update, testrecv, update], dim,
                          msg.npeers - 1))

        body.append(Return(gflag))

        return make_efunc('pokempi%d' % key, List(body=body), retval='int')
Exemplo n.º 5
0
    def test_create_ops_dat_time_function(self):
        grid = Grid(shape=(4))

        u = TimeFunction(name='u', grid=grid, space_order=2)

        block = OpsBlock('block')

        name_to_ops_dat = {}

        result = create_ops_dat(u, name_to_ops_dat, block)

        assert name_to_ops_dat['ut0'].base.name == namespace['ops_dat_name'](u.name)
        assert name_to_ops_dat['ut0'].indices == (Symbol('t0'),)
        assert name_to_ops_dat['ut1'].base.name == namespace['ops_dat_name'](u.name)
        assert name_to_ops_dat['ut1'].indices == (Symbol('t1'),)

        assert result[0].expr.lhs.name == namespace['ops_dat_dim'](u.name)
        assert result[0].expr.rhs.params == (Integer(4),)

        assert result[1].expr.lhs.name == namespace['ops_dat_base'](u.name)
        assert result[1].expr.rhs.params == (Zero(),)

        assert result[2].expr.lhs.name == namespace['ops_dat_d_p'](u.name)
        assert result[2].expr.rhs.params == (Integer(2),)

        assert result[3].expr.lhs.name == namespace['ops_dat_d_m'](u.name)
        assert result[3].expr.rhs.params == (Integer(-2),)

        assert result[4].expr.lhs.name == namespace['ops_dat_name'](u.name)
        assert len(result[4].expr.rhs.params) == 2
        assert result[4].expr.rhs.params[0].name == namespace['ops_decl_dat'].name
        assert result[4].expr.rhs.params[0].args == (
            block,
            1,
            Symbol(namespace['ops_dat_dim'](u.name)),
            Symbol(namespace['ops_dat_base'](u.name)),
            Symbol(namespace['ops_dat_d_m'](u.name)),
            Symbol(namespace['ops_dat_d_p'](u.name)),
            Byref(u.indexify((0,))),
            Literal('"%s"' % u._C_typedata),
            Literal('"ut0"')
        )
        assert result[4].expr.rhs.params[1].name == namespace['ops_decl_dat'].name
        assert result[4].expr.rhs.params[1].args == (
            block,
            1,
            Symbol(namespace['ops_dat_dim'](u.name)),
            Symbol(namespace['ops_dat_base'](u.name)),
            Symbol(namespace['ops_dat_d_m'](u.name)),
            Symbol(namespace['ops_dat_d_p'](u.name)),
            Byref(u.indexify((1,))),
            Literal('"%s"' % u._C_typedata),
            Literal('"ut1"')
        )
Exemplo n.º 6
0
    def _make_haloupdate(self, f, hse, key, msg=None):
        comm = f.grid.distributor._obj_comm

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        dim = Dimension(name='i')

        msgi = IndexedPointer(msg, dim)

        bufg = FieldFromComposite(msg._C_field_bufg, msgi)
        bufs = FieldFromComposite(msg._C_field_bufs, msgi)

        fromrank = FieldFromComposite(msg._C_field_from, msgi)
        torank = FieldFromComposite(msg._C_field_to, msgi)

        sizes = [
            FieldFromComposite('%s[%d]' % (msg._C_field_sizes, i), msgi)
            for i in range(len(f._dist_dimensions))
        ]
        ofsg = [
            FieldFromComposite('%s[%d]' % (msg._C_field_ofsg, i), msgi)
            for i in range(len(f._dist_dimensions))
        ]
        ofsg = [fixed.get(d) or ofsg.pop(0) for d in f.dimensions]

        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Call('gather_%s' % key, [bufg] + sizes + [f] + ofsg)
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)

        # Make Irecv/Isend
        count = reduce(mul, sizes, 1)
        rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
        rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
        recv = Call('MPI_Irecv', [
            bufs, count,
            Macro(dtype_to_mpitype(f.dtype)), fromrank,
            Integer(13), comm, rrecv
        ])
        send = Call('MPI_Isend', [
            bufg, count,
            Macro(dtype_to_mpitype(f.dtype)), torank,
            Integer(13), comm, rsend
        ])

        # The -1 below is because an Iteration, by default, generates <=
        ncomms = Symbol(name='ncomms')
        iet = Iteration([recv, gather, send], dim, ncomms - 1)
        parameters = ([f, comm, msg, ncomms]) + list(fixed.values())
        return Callable('haloupdate%d' % key, iet, 'void', parameters,
                        ('static', ))
Exemplo n.º 7
0
    def _make_halowait(self, f, hse, key, msg=None):
        nb = f.grid.distributor._obj_neighborhood
        wait = self._cache_dims[f.dimensions][2]

        fixed = {d: Symbol(name="o%s" % d.root) for d in hse.loc_indices}

        # Only retain the halos required by the Diag scheme
        # Note: `sorted` is only for deterministic code generation
        halos = sorted(i for i in hse.halos if isinstance(i.dim, tuple))

        body = []
        for dims, tosides in halos:
            mapper = OrderedDict(zip(dims, [i.flip() for i in tosides]))
            fromrank = FieldFromPointer(
                ''.join(i.name[0] for i in mapper.values()), nb)
            ofss = [
                fixed.get(d,
                          f._C_get_field(HALO, d, mapper.get(d)).offset)
                for d in f.dimensions
            ]

            msgi = Byref(IndexedPointer(msg, len(body)))

            body.append(Call(wait.name, [f] + ofss + [fromrank, msgi]))

        iet = List(body=body)
        parameters = [f] + list(fixed.values()) + [nb, msg]
        return Callable('halowait%d' % key, iet, 'void', parameters,
                        ('static', ))
Exemplo n.º 8
0
    def _initialize(iet):
        comm = None

        for i in iet.parameters:
            if isinstance(i, MPICommObject):
                comm = i
                break

        if comm is not None:
            rank = Symbol(name='rank')
            rank_decl = LocalExpression(DummyEq(rank, 0))
            rank_init = Call('MPI_Comm_rank', [comm, Byref(rank)])

            ngpus = Symbol(name='ngpus')
            call = Function('omp_get_num_devices')()
            ngpus_init = LocalExpression(DummyEq(ngpus, call))

            set_device_num = Call('omp_set_default_device', [rank % ngpus])

            body = [rank_decl, rank_init, ngpus_init, set_device_num]

            init = List(header=c.Comment('Begin of OpenMP+MPI setup'),
                        body=body,
                        footer=(c.Comment('End of OpenMP+MPI setup'), c.Line()))

            iet = iet._rebuild(body=(init,) + iet.body)

        return iet
Exemplo n.º 9
0
def create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil):
    elements_per_point = 1
    dtype = Literal('"%s"' % dtype_to_cstr(p.dtype))

    if p.is_Constant:
        ops_type = namespace['ops_arg_gbl']
        ops_name = Byref(Constant(name=p.name[1:]))
        rw_flag = namespace['ops_read']
    else:
        ops_type = namespace['ops_arg_dat']
        accessible_info = accessible_origin[p.name]
        ops_name = name_to_ops_dat[p.name] \
            if accessible_info.time is None \
            else name_to_ops_dat[accessible_info.origin_name].\
            indexify([Add(accessible_info.time, accessible_info.shift)])
        rw_flag = namespace['ops_read'] if p.read_only else namespace[
            'ops_write']

    ops_arg = OpsArgDecl(ops_type=ops_type,
                         ops_name=ops_name,
                         elements_per_point=elements_per_point,
                         dtype=dtype,
                         rw_flag=rw_flag)

    return ops_arg
Exemplo n.º 10
0
        def _(iet):
            # TODO: we need to pick the rank from `comm_shm`, not `comm`,
            # so that we have nranks == ngpus (as long as the user has launched
            # the right number of MPI processes per node given the available
            # number of GPUs per node)

            objcomm = None
            for i in iet.parameters:
                if isinstance(i, MPICommObject):
                    objcomm = i
                    break

            devicetype = as_list(self.lang[self.platform])

            try:
                lang_init = [self.lang['init'](devicetype)]
            except TypeError:
                # Not all target languages need to be explicitly initialized
                lang_init = []

            deviceid = DeviceID()
            if objcomm is not None:
                rank = Symbol(name='rank')
                rank_decl = LocalExpression(DummyEq(rank, 0))
                rank_init = Call('MPI_Comm_rank', [objcomm, Byref(rank)])

                ngpus = Symbol(name='ngpus')
                call = self.lang['num-devices'](devicetype)
                ngpus_init = LocalExpression(DummyEq(ngpus, call))

                osdd_then = self.lang['set-device']([deviceid] + devicetype)
                osdd_else = self.lang['set-device']([rank % ngpus] +
                                                    devicetype)

                body = lang_init + [
                    Conditional(
                        CondNe(deviceid, -1),
                        osdd_then,
                        List(
                            body=[rank_decl, rank_init, ngpus_init, osdd_else
                                  ]),
                    )
                ]

                header = c.Comment('Begin of %s+MPI setup' % self.lang['name'])
                footer = c.Comment('End of %s+MPI setup' % self.lang['name'])
            else:
                body = lang_init + [
                    Conditional(
                        CondNe(deviceid, -1),
                        self.lang['set-device']([deviceid] + devicetype))
                ]

                header = c.Comment('Begin of %s setup' % self.lang['name'])
                footer = c.Comment('End of %s setup' % self.lang['name'])

            init = List(header=header, body=body, footer=(footer, c.Line()))
            iet = iet._rebuild(body=(init, ) + iet.body)

            return iet, {'args': deviceid}
Exemplo n.º 11
0
 def _call_sendrecv(self, name, *args, msg=None, haloid=None):
     # Drop `sizes` as this HaloExchangeBuilder conveys them through `msg`
     # Drop `ofss` as this HaloExchangeBuilder only needs them in `wait()`,
     # to collect and scatter the result of an MPI_Irecv
     f, _, ofsg, _, fromrank, torank, comm = args
     msg = Byref(IndexedPointer(msg, haloid))
     return Call(name, [f] + ofsg + [fromrank, torank, comm, msg])
Exemplo n.º 12
0
    def test_create_ops_arg_constant(self):
        a = Constant(name='*a')

        ops_arg = create_ops_arg(a, {}, {}, {})

        assert ops_arg.ops_type == namespace['ops_arg_gbl']
        assert str(ops_arg.ops_name) == str(Byref(Constant(name='a')))
        assert ops_arg.elements_per_point == 1
        assert ops_arg.dtype == Literal('"%s"' % dtype_to_cstr(a.dtype))
        assert ops_arg.rw_flag == namespace['ops_read']
Exemplo n.º 13
0
    def test_create_ops_arg_constant(self):
        a = Constant(name='*a')

        res = create_ops_arg(a, {}, {}, {})

        assert type(res) == namespace['ops_arg_gbl']
        assert str(res.args[0]) == str(Byref(Constant(name='a')))
        assert res.args[1] == 1
        assert res.args[2] == Literal('"%s"' % dtype_to_cstr(a.dtype))
        assert res.args[3] == namespace['ops_read']
Exemplo n.º 14
0
        def test_create_ops_arg_constant(self):
            a = Constant(name='*a')

            res = create_ops_arg(a, {}, {})

            assert res.name == namespace['ops_arg_gbl'].name
            assert res.args == [
                Byref(Constant(name='a')), 1,
                Literal('"%s"' % dtype_to_cstr(a.dtype)), namespace['ops_read']
            ]
Exemplo n.º 15
0
def create_ops_arg(p, name_to_ops_dat, par_to_ops_stencil):
    if p.is_Constant:
        return namespace['ops_arg_gbl'](
            Byref(Constant(name=p.name[1:])), 1,
            Literal('"%s"' % dtype_to_cstr(p.dtype)), namespace['ops_read'])
    else:
        return namespace['ops_arg_dat'](
            name_to_ops_dat[p.name], 1, par_to_ops_stencil[p],
            Literal('"%s"' % dtype_to_cstr(p.dtype)),
            namespace['ops_read'] if p.read_only else namespace['ops_write'])
Exemplo n.º 16
0
    def _alloc_pointed_array_on_high_bw_mem(self, site, obj, storage):
        """
        Allocate the following objects in the high bandwidth memory:

            * The pointer array `obj`;
            * The pointee Array `obj.array`

        If the pointer array is defined over `sregistry.threadid`, that is a thread
        Dimension, then each `obj.array` slice is allocated and freed individually
        by the owner thread.
        """
        # The pointer array
        decl = Definition(obj)

        memptr = VOID(Byref(obj._C_symbol), '**')
        alignment = obj._data_alignment
        size = SizeOf(Keyword('%s*' % obj._C_typedata)) * obj.dim.symbolic_size
        alloc0 = self.lang['host-alloc'](memptr, alignment, size)

        free0 = self.lang['host-free'](obj._C_symbol)

        # The pointee Array
        pobj = IndexedPointer(obj._C_symbol, obj.dim)
        memptr = VOID(Byref(pobj), '**')
        size = SizeOf(obj._C_typedata) * prod(obj.array.symbolic_shape)
        alloc1 = self.lang['host-alloc'](memptr, alignment, size)

        free1 = self.lang['host-free'](pobj)

        # Dump
        if obj.dim is self.sregistry.threadid:
            storage.update(obj,
                           site,
                           allocs=(decl, alloc0),
                           frees=free0,
                           pallocs=(obj.dim, alloc1),
                           pfrees=(obj.dim, free1))
        else:
            storage.update(obj,
                           site,
                           allocs=(decl, alloc0, alloc1),
                           frees=(free0, free1))
Exemplo n.º 17
0
def create_ops_fetch(f, name_to_ops_dat, time_upper_bound):

    if f.is_TimeFunction:
        ops_fetch = [
            namespace['ops_dat_fetch_data'](
                name_to_ops_dat[f.name].indexify(
                    [Mod(Add(time_upper_bound, -i), f._time_size)]),
                Byref(
                    f.indexify([Mod(Add(time_upper_bound, -i),
                                    f._time_size)])))
            for i in range(f._time_size)
        ]

    else:
        # The second parameter is the beginning of the array. But I didn't manage
        # to generate a C code like: `v`. Instead, I am generating `&(v[0])`.
        ops_fetch = [
            namespace['ops_dat_fetch_data'](name_to_ops_dat[f.name],
                                            Byref(f.indexify([0])))
        ]

    return ops_fetch
Exemplo n.º 18
0
    def _make_sendrecv(self, f, hse, key, **kwargs):
        comm = f.grid.distributor._obj_comm

        buf_dims = [Dimension(name='buf_%s' % d.root) for d in f.dimensions
                    if d not in hse.loc_indices]
        bufg = Array(name=self.sregistry.make_name(prefix='bufg'),
                     dimensions=buf_dims, dtype=f.dtype, padding=0)
        bufs = Array(name=self.sregistry.make_name(prefix='bufs'),
                     dimensions=buf_dims, dtype=f.dtype, padding=0)

        ofsg = [Symbol(name='og%s' % d.root) for d in f.dimensions]
        ofss = [Symbol(name='os%s' % d.root) for d in f.dimensions]

        fromrank = Symbol(name='fromrank')
        torank = Symbol(name='torank')

        gather = Call('gather%s' % key, [bufg] + list(bufg.shape) + [f] + ofsg)
        scatter = Call('scatter%s' % key, [bufs] + list(bufs.shape) + [f] + ofss)

        # The `gather` is unnecessary if sending to MPI.PROC_NULL
        gather = Conditional(CondNe(torank, Macro('MPI_PROC_NULL')), gather)
        # The `scatter` must be guarded as we must not alter the halo values along
        # the domain boundary, where the sender is actually MPI.PROC_NULL
        scatter = Conditional(CondNe(fromrank, Macro('MPI_PROC_NULL')), scatter)

        count = reduce(mul, bufs.shape, 1)
        rrecv = MPIRequestObject(name='rrecv')
        rsend = MPIRequestObject(name='rsend')
        recv = IrecvCall([bufs, count, Macro(dtype_to_mpitype(f.dtype)),
                         fromrank, Integer(13), comm, Byref(rrecv)])
        send = IsendCall([bufg, count, Macro(dtype_to_mpitype(f.dtype)),
                         torank, Integer(13), comm, Byref(rsend)])

        waitrecv = Call('MPI_Wait', [Byref(rrecv), Macro('MPI_STATUS_IGNORE')])
        waitsend = Call('MPI_Wait', [Byref(rsend), Macro('MPI_STATUS_IGNORE')])

        iet = List(body=[recv, gather, send, waitsend, waitrecv, scatter])
        parameters = ([f] + list(bufs.shape) + ofsg + ofss + [fromrank, torank, comm])
        return SendRecv(key, iet, parameters, bufg, bufs)
Exemplo n.º 19
0
    def _alloc_array_on_high_bw_mem(self, site, obj, storage, *args):
        """
        Allocate an Array in the high bandwidth memory.
        """
        decl = Definition(obj)

        memptr = VOID(Byref(obj._C_symbol), '**')
        alignment = obj._data_alignment
        size = SizeOf(obj._C_typedata) * prod(obj.symbolic_shape)
        alloc = self.lang['host-alloc'](memptr, alignment, size)

        free = self.lang['host-free'](obj._C_symbol)

        storage.update(obj, site, allocs=(decl, alloc), frees=free)
Exemplo n.º 20
0
    def _make_poke(self, hs, key, msgs):
        flag = Symbol(name='flag')
        initflag = LocalExpression(DummyEq(flag, 0))

        body = [initflag]
        for msg in msgs:
            dim = Dimension(name='i')
            msgi = IndexedPointer(msg, dim)

            rrecv = Byref(FieldFromComposite(msg._C_field_rrecv, msgi))
            rsend = Byref(FieldFromComposite(msg._C_field_rsend, msgi))
            testrecv = Call(
                'MPI_Test',
                [rrecv, Byref(flag),
                 Macro('MPI_STATUS_IGNORE')])
            testsend = Call(
                'MPI_Test',
                [rsend, Byref(flag),
                 Macro('MPI_STATUS_IGNORE')])

            body.append(Iteration([testsend, testrecv], dim, msg.npeers - 1))

        return make_efunc('pokempi%d' % key, body)
Exemplo n.º 21
0
    def _(iet):
        # TODO: we need to pick the rank from `comm_shm`, not `comm`,
        # so that we have nranks == ngpus (as long as the user has launched
        # the right number of MPI processes per node given the available
        # number of GPUs per node)

        objcomm = None
        for i in iet.parameters:
            if isinstance(i, MPICommObject):
                objcomm = i
                break

        deviceid = DeviceID()
        device_nvidia = Macro('acc_device_nvidia')
        if objcomm is not None:
            rank = Symbol(name='rank')
            rank_decl = LocalExpression(DummyEq(rank, 0))
            rank_init = Call('MPI_Comm_rank', [objcomm, Byref(rank)])

            ngpus = Symbol(name='ngpus')
            call = DefFunction('acc_get_num_devices', device_nvidia)
            ngpus_init = LocalExpression(DummyEq(ngpus, call))

            asdn_then = Call('acc_set_device_num', [deviceid, device_nvidia])
            asdn_else = Call('acc_set_device_num',
                             [rank % ngpus, device_nvidia])

            body = [
                Call('acc_init', [device_nvidia]),
                Conditional(
                    CondNe(deviceid, -1), asdn_then,
                    List(body=[rank_decl, rank_init, ngpus_init, asdn_else]))
            ]
        else:
            body = [
                Call('acc_init', [device_nvidia]),
                Conditional(
                    CondNe(deviceid, -1),
                    Call('acc_set_device_num', [deviceid, device_nvidia]))
            ]

        init = List(header=c.Comment('Begin of OpenACC+MPI setup'),
                    body=body,
                    footer=(c.Comment('End of OpenACC+MPI setup'), c.Line()))
        iet = iet._rebuild(body=(init, ) + iet.body)

        return iet, {'args': deviceid}
Exemplo n.º 22
0
def create_ops_arg(p, accessible_origin, name_to_ops_dat, par_to_ops_stencil):
    if p.is_Constant:
        return namespace['ops_arg_gbl'](
            Byref(Constant(name=p.name[1:])), 1,
            Literal('"%s"' % dtype_to_cstr(p.dtype)), namespace['ops_read'])
    else:
        accessible_info = accessible_origin[p.name]

        dat_name = name_to_ops_dat[p.name] \
            if accessible_info.time is None \
            else name_to_ops_dat[accessible_info.origin_name].\
            indexify([accessible_info.time])

        return namespace['ops_arg_dat'](
            dat_name, 1, par_to_ops_stencil[p],
            Literal('"%s"' % dtype_to_cstr(p.dtype)),
            namespace['ops_read'] if p.read_only else namespace['ops_write'])
Exemplo n.º 23
0
    def test_create_ops_dat_function(self):
        grid = Grid(shape=(4))

        u = Function(name='u', grid=grid, space_order=2)

        block = OpsBlock('block')

        name_to_ops_dat = {}

        ops_dat = create_ops_dat(u, name_to_ops_dat, block)

        assert name_to_ops_dat['u'].name == namespace['ops_dat_name'](u.name)
        assert name_to_ops_dat['u']._C_typename == namespace['ops_dat_type']

        assert ops_dat.dim_val.expr.lhs.name == \
            namespace['ops_dat_dim'](u.name)
        assert ops_dat.dim_val.expr.rhs.params == \
            (Integer(4),)

        assert ops_dat.base_val.expr.lhs.name == \
            namespace['ops_dat_base'](u.name)
        assert ops_dat.base_val.expr.rhs.params == (Zero(),)

        assert ops_dat.d_p_val.expr.lhs.name == namespace['ops_dat_d_p'](u.name)
        assert ops_dat.d_p_val.expr.rhs.params == (Integer(2),)

        assert ops_dat.d_m_val.expr.lhs.name == namespace['ops_dat_d_m'](u.name)
        assert ops_dat.d_m_val.expr.rhs.params == (Integer(-2),)

        assert ops_dat.ops_decl_dat.expr.lhs == name_to_ops_dat['u']
        assert type(ops_dat.ops_decl_dat.expr.rhs) == namespace['ops_decl_dat']
        assert ops_dat.ops_decl_dat.expr.rhs.args == (
            block,
            1,
            Symbol(namespace['ops_dat_dim'](u.name)),
            Symbol(namespace['ops_dat_base'](u.name)),
            Symbol(namespace['ops_dat_d_m'](u.name)),
            Symbol(namespace['ops_dat_d_p'](u.name)),
            Byref(u.indexify((0,))),
            Literal('"%s"' % u._C_typedata),
            Literal('"u"')
        )
Exemplo n.º 24
0
    def _initialize(iet):
        # TODO: we need to pick the rank from `comm_shm`, not `comm`,
        # so that we have nranks == ngpus (as long as the user has launched
        # the right number of MPI processes per node given the available
        # number of GPUs per node)
        comm = None
        for i in iet.parameters:
            if isinstance(i, MPICommObject):
                comm = i
                break

        device_nvidia = Macro('acc_device_nvidia')
        body = Call('acc_init', [device_nvidia])

        if comm is not None:
            rank = Symbol(name='rank')
            rank_decl = LocalExpression(DummyEq(rank, 0))
            rank_init = Call('MPI_Comm_rank', [comm, Byref(rank)])

            ngpus = Symbol(name='ngpus')
            call = DefFunction('acc_get_num_devices', device_nvidia)
            ngpus_init = LocalExpression(DummyEq(ngpus, call))

            devicenum = Symbol(name='devicenum')
            devicenum_init = LocalExpression(DummyEq(devicenum, rank % ngpus))

            set_device_num = Call('acc_set_device_num',
                                  [devicenum, device_nvidia])

            body = [
                rank_decl, rank_init, ngpus_init, devicenum_init,
                set_device_num, body
            ]

        init = List(header=c.Comment('Begin of OpenACC+MPI setup'),
                    body=body,
                    footer=(c.Comment('End of OpenACC+MPI setup'), c.Line()))

        iet = iet._rebuild(body=(init, ) + iet.body)

        return iet
Exemplo n.º 25
0
def test_make_cpp_parfor():
    """
    Test construction of a CPP parallel for. This excites the IET construction
    machinery in several ways, in particular by using Lambda nodes (to generate
    C++ lambda functions) and nested Calls.
    """
    class STDVectorThreads(LocalObject):
        dtype = type('std::vector<std::thread>', (c_void_p, ), {})

        def __init__(self):
            self.name = 'threads'

    class STDThread(LocalObject):
        dtype = type('std::thread&', (c_void_p, ), {})

        def __init__(self, name):
            self.name = name

    class FunctionType(LocalObject):
        dtype = type('FuncType&&', (c_void_p, ), {})

        def __init__(self, name):
            self.name = name

    # Basic symbols
    nthreads = Symbol(name='nthreads', is_const=True)
    threshold = Symbol(name='threshold', is_const=True)
    last = Symbol(name='last', is_const=True)
    first = Symbol(name='first', is_const=True)
    portion = Symbol(name='portion', is_const=True)

    # Composite symbols
    threads = STDVectorThreads()

    # Iteration helper symbols
    begin = Symbol(name='begin')
    l = Symbol(name='l')
    end = Symbol(name='end')

    # Functions
    stdmax = sympy.Function('std::max')

    # Construct the parallel-for body
    func = FunctionType('func')
    i = Dimension(name='i')
    threadobj = Call(
        'std::thread',
        Lambda(
            Iteration(Call(func.name, i), i, (begin, end - 1, 1)),
            ['=', Byref(func.name)],
        ))
    threadpush = Call(FieldFromComposite('push_back', threads), threadobj)
    it = Dimension(name='it')
    iteration = Iteration([
        LocalExpression(DummyEq(begin, it)),
        LocalExpression(DummyEq(l, it + portion)),
        LocalExpression(DummyEq(end, InlineIf(l > last, last, l))), threadpush
    ], it, (first, last, portion))
    thread = STDThread('x')
    waitcall = Call('std::for_each', [
        Call(FieldFromComposite('begin', threads)),
        Call(FieldFromComposite('end', threads)),
        Lambda(Call(FieldFromComposite('join', thread.name)), [], [thread])
    ])
    body = [
        LocalExpression(DummyEq(threshold, 1)),
        LocalExpression(
            DummyEq(portion, stdmax(threshold, (last - first) / nthreads))),
        Call(FieldFromComposite('reserve', threads), nthreads), iteration,
        waitcall
    ]

    parfor = ElementalFunction('parallel_for', body, 'void',
                               [first, last, func, nthreads])

    assert str(parfor) == """\
Exemplo n.º 26
0
def create_ops_dat(f, name_to_ops_dat, block):
    ndim = f.ndim - (1 if f.is_TimeFunction else 0)

    dim = Array(name=namespace['ops_dat_dim'](f.name),
                dimensions=(DefaultDimension(name='dim',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')
    base = Array(name=namespace['ops_dat_base'](f.name),
                 dimensions=(DefaultDimension(name='base',
                                              default_value=ndim), ),
                 dtype=np.int32,
                 scope='stack')
    d_p = Array(name=namespace['ops_dat_d_p'](f.name),
                dimensions=(DefaultDimension(name='d_p',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')
    d_m = Array(name=namespace['ops_dat_d_m'](f.name),
                dimensions=(DefaultDimension(name='d_m',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')

    res = []
    base_val = [Zero() for i in range(ndim)]

    # If f is a TimeFunction we need to create a ops_dat for each time stepping
    # variable (eg: t1, t2)
    if f.is_TimeFunction:
        time_pos = f._time_position
        time_index = f.indices[time_pos]
        time_dims = f.shape[time_pos]

        dim_shape = sympify(f.shape[:time_pos] + f.shape[time_pos + 1:])
        padding = f.padding[:time_pos] + f.padding[time_pos + 1:]
        halo = f.halo[:time_pos] + f.halo[time_pos + 1:]
        d_p_val = tuple(sympify([p[0] + h[0] for p, h in zip(padding, halo)]))
        d_m_val = tuple(
            sympify([-(p[1] + h[1]) for p, h in zip(padding, halo)]))

        ops_dat_array = Array(name=namespace['ops_dat_name'](f.name),
                              dimensions=(DefaultDimension(
                                  name='dat', default_value=time_dims), ),
                              dtype='ops_dat',
                              scope='stack')

        dat_decls = []
        for i in range(time_dims):
            name = '%s%s%s' % (f.name, time_index, i)
            name_to_ops_dat[name] = ops_dat_array.indexify(
                [Symbol('%s%s' % (time_index, i))])
            dat_decls.append(namespace['ops_decl_dat'](block, 1,
                                                       Symbol(dim.name),
                                                       Symbol(base.name),
                                                       Symbol(d_m.name),
                                                       Symbol(d_p.name),
                                                       Byref(f.indexify([i])),
                                                       Literal('"%s"' %
                                                               f._C_typedata),
                                                       Literal('"%s"' % name)))

        ops_decl_dat = Expression(
            ClusterizedEq(Eq(ops_dat_array, ListInitializer(dat_decls))))
    else:
        ops_dat = OpsDat("%s_dat" % f.name)
        name_to_ops_dat[f.name] = ops_dat

        d_p_val = tuple(
            sympify([p[0] + h[0] for p, h in zip(f.padding, f.halo)]))
        d_m_val = tuple(
            sympify([-(p[1] + h[1]) for p, h in zip(f.padding, f.halo)]))
        dim_shape = sympify(f.shape)

        ops_decl_dat = Expression(
            ClusterizedEq(
                Eq(
                    ops_dat,
                    namespace['ops_decl_dat'](block, 1, Symbol(dim.name),
                                              Symbol(base.name),
                                              Symbol(d_m.name),
                                              Symbol(d_p.name),
                                              Byref(f.indexify([0])),
                                              Literal('"%s"' % f._C_typedata),
                                              Literal('"%s"' % f.name)))))

    res.append(Expression(ClusterizedEq(Eq(dim, ListInitializer(dim_shape)))))
    res.append(Expression(ClusterizedEq(Eq(base, ListInitializer(base_val)))))
    res.append(Expression(ClusterizedEq(Eq(d_p, ListInitializer(d_p_val)))))
    res.append(Expression(ClusterizedEq(Eq(d_m, ListInitializer(d_m_val)))))
    res.append(ops_decl_dat)

    return res
Exemplo n.º 27
0
def create_ops_dat(f, name_to_ops_dat, block):
    ndim = f.ndim - (1 if f.is_TimeFunction else 0)

    dim = Array(name=namespace['ops_dat_dim'](f.name),
                dimensions=(DefaultDimension(name='dim',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')
    base = Array(name=namespace['ops_dat_base'](f.name),
                 dimensions=(DefaultDimension(name='base',
                                              default_value=ndim), ),
                 dtype=np.int32,
                 scope='stack')
    d_p = Array(name=namespace['ops_dat_d_p'](f.name),
                dimensions=(DefaultDimension(name='d_p',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')
    d_m = Array(name=namespace['ops_dat_d_m'](f.name),
                dimensions=(DefaultDimension(name='d_m',
                                             default_value=ndim), ),
                dtype=np.int32,
                scope='stack')

    base_val = [Zero() for i in range(ndim)]

    # If f is a TimeFunction we need to create a ops_dat for each time stepping
    # variable (eg: t1, t2)
    if f.is_TimeFunction:
        time_pos = f._time_position
        time_index = f.indices[time_pos]
        time_dims = f.shape[time_pos]

        dim_val = f.shape[:time_pos] + f.shape[time_pos + 1:]
        d_p_val = f._size_nodomain.left[time_pos + 1:]
        d_m_val = [-i for i in f._size_nodomain.right[time_pos + 1:]]

        ops_dat_array = Array(name=namespace['ops_dat_name'](f.name),
                              dimensions=(DefaultDimension(
                                  name='dat', default_value=time_dims), ),
                              dtype=namespace['ops_dat_type'],
                              scope='stack')

        dat_decls = []
        for i in range(time_dims):
            name = '%s%s%s' % (f.name, time_index, i)

            dat_decls.append(namespace['ops_decl_dat'](block, 1,
                                                       Symbol(dim.name),
                                                       Symbol(base.name),
                                                       Symbol(d_m.name),
                                                       Symbol(d_p.name),
                                                       Byref(f.indexify([i])),
                                                       Literal('"%s"' %
                                                               f._C_typedata),
                                                       Literal('"%s"' % name)))

        ops_decl_dat = Expression(
            ClusterizedEq(Eq(ops_dat_array, ListInitializer(dat_decls))))

        # Inserting the ops_dat array in case of TimeFunction.
        name_to_ops_dat[f.name] = ops_dat_array

    else:
        ops_dat = OpsDat("%s_dat" % f.name)
        name_to_ops_dat[f.name] = ops_dat

        dim_val = f.shape
        d_p_val = f._size_nodomain.left
        d_m_val = [-i for i in f._size_nodomain.right]

        ops_decl_dat = Expression(
            ClusterizedEq(
                Eq(
                    ops_dat,
                    namespace['ops_decl_dat'](block, 1, Symbol(dim.name),
                                              Symbol(base.name),
                                              Symbol(d_m.name),
                                              Symbol(d_p.name),
                                              Byref(f.indexify([0])),
                                              Literal('"%s"' % f._C_typedata),
                                              Literal('"%s"' % f.name)))))

    dim_val = Expression(ClusterizedEq(Eq(dim, ListInitializer(dim_val))))
    base_val = Expression(ClusterizedEq(Eq(base, ListInitializer(base_val))))
    d_p_val = Expression(ClusterizedEq(Eq(d_p, ListInitializer(d_p_val))))
    d_m_val = Expression(ClusterizedEq(Eq(d_m, ListInitializer(d_m_val))))

    return OpsDatDecl(dim_val=dim_val,
                      base_val=base_val,
                      d_p_val=d_p_val,
                      d_m_val=d_m_val,
                      ops_decl_dat=ops_decl_dat)
Exemplo n.º 28
0
    def test_create_ops_dat_function(self):
        grid = Grid(shape=(4))

        u = Function(name='u', grid=grid, space_order=2)

        block = OpsBlock('block')

        name_to_ops_dat = {}

        result = create_ops_dat(u, name_to_ops_dat, block)

        assert name_to_ops_dat['u'].name == namespace['ops_dat_name'](u.name)
        assert name_to_ops_dat['u']._C_typename == namespace['ops_dat_type']

        assert result[0].expr.lhs.name == namespace['ops_dat_dim'](u.name)
        assert result[0].expr.rhs.params == (Integer(4), )

        assert result[1].expr.lhs.name == namespace['ops_dat_base'](u.name)
        assert result[1].expr.rhs.params == (Zero(), )

        assert result[2].expr.lhs.name == namespace['ops_dat_d_p'](u.name)
        assert result[2].expr.rhs.params == (Integer(2), )

        assert result[3].expr.lhs.name == namespace['ops_dat_d_m'](u.name)
        assert result[3].expr.rhs.params == (Integer(-2), )

        assert result[4].expr.lhs == name_to_ops_dat['u']
        assert result[4].expr.rhs.name == namespace['ops_decl_dat'].name
        assert result[4].expr.rhs.args == (
            block, 1, Symbol(namespace['ops_dat_dim'](u.name)),
            Symbol(namespace['ops_dat_base'](u.name)),
            Symbol(namespace['ops_dat_d_m'](u.name)),
            Symbol(namespace['ops_dat_d_p'](u.name)), Byref(u.indexify(
                (0, ))), Literal('"%s"' % u._C_typedata), Literal('"u"'))

        def test_create_ops_arg_constant(self):
            a = Constant(name='*a')

            res = create_ops_arg(a, {}, {})

            assert res.name == namespace['ops_arg_gbl'].name
            assert res.args == [
                Byref(Constant(name='a')), 1,
                Literal('"%s"' % dtype_to_cstr(a.dtype)), namespace['ops_read']
            ]

        @pytest.mark.parametrize('read', [True, False])
        def test_create_ops_arg_function(self, read):
            u = OpsAccessible('u', np.float32, read)

            dat = OpsDat('u_dat')
            stencil = OpsStencil('stencil')

            res = create_ops_arg(u, {'u': dat}, {u: stencil})

            assert res.name == namespace['ops_arg_dat'].name
            assert res.args == [
                dat, 1, stencil,
                Literal('"%s"' % dtype_to_cstr(u.dtype)),
                namespace['ops_read'] if read else namespace['ops_write']
            ]