Пример #1
0
def trans(psy):
    '''PSyclone transformation script for the dynamo0.3 API to apply
    redundant computation generically to all loops that iterate over
    dofs, with the exception of loops containing kernels with
    reductions.

    '''
    rc_trans = Dynamo0p3RedundantComputationTrans()

    transformed = 0

    for invoke in psy.invokes.invoke_list:
        schedule = invoke.schedule
        for loop in schedule.loops():
            if loop.iteration_space in ITERATION_SPACES:
                # we may have more than one kernel in the loop so
                # check that none of them are reductions
                reduction = False
                for call in loop.kernels():
                    if call.is_reduction:
                        reduction = True
                        break
                if not reduction:
                    transformed += 1
                    schedule, _ = rc_trans.apply(loop, depth=DEPTH)

    print("Transformed {0} loops".format(transformed))
    return psy
Пример #2
0
def trans(psy):
    '''PSyclone transformation script for the dynamo0.3 API to apply
    redundant computation into the level 1 halo generically to all
    loops that iterate over dofs and exclusively contain the setval_c
    builtin. The reason for choosing this particular builtin is that
    this builtin only writes to data so will not cause any additional
    halo exchanges, or increases in halo exchange depth, through
    redundant computation.

    '''
    rc_trans = Dynamo0p3RedundantComputationTrans()

    transformed = 0

    for invoke in psy.invokes.invoke_list:
        schedule = invoke.schedule
        for loop in schedule.loops():
            if loop.iteration_space in ITERATION_SPACES:
                # we may have more than one kernel in the loop so
                # check that all of them are in the list of accepted
                # kernel names
                setcalls = True
                for call in loop.calls():
                    if call.name not in KERNEL_NAMES:
                        setcalls = False
                        break
                if setcalls:
                    transformed += 1
                    schedule, _ = rc_trans.apply(loop, depth=DEPTH)

    print("Transformed {0} loops".format(transformed))
    return psy
Пример #3
0
def test_add_halo_exchange_code_nreader(monkeypatch):
    '''Check that _add_field_component_halo_exchange() in DynLoop raises
    the expected exception when there is more than one read dependence
    associated with a halo exchange in the read dependence list.

    '''
    api_config = Config.get().api_conf(API)
    monkeypatch.setattr(api_config, "_compute_annexed_dofs", True)
    _, invoke_info = parse(os.path.join(
        BASE_PATH, "15.7.3_setval_X_before_user_kern.f90"),
                           api=API)
    psy = PSyFactory(API, distributed_memory=True).create(invoke_info)

    schedule = psy.invokes.invoke_list[0].schedule
    loop = schedule[0]
    rtrans = Dynamo0p3RedundantComputationTrans()
    rtrans.apply(loop, options={"depth": 1})
    f1_field = schedule[0].field
    del schedule.children[0]
    schedule[1].field._name = "f1"
    schedule[2].field._name = "f1"
    with pytest.raises(InternalError) as info:
        loop._add_field_component_halo_exchange(f1_field)
    assert ("When replacing a halo exchange with another one for field f1, "
            "a subsequent dependent halo exchange was found. This should "
            "never happen." in str(info.value))
Пример #4
0
def test_setval_x_then_user(tmpdir, monkeypatch):
    ''' Check that the correct halo exchanges are added if redundant
    computation is enabled for a built-in kernel called before a
    user-supplied kernel. '''
    api_config = Config.get().api_conf(API)
    monkeypatch.setattr(api_config, "_compute_annexed_dofs", True)
    _, invoke_info = parse(os.path.join(
        BASE_PATH, "15.7.3_setval_X_before_user_kern.f90"),
                           api=API)
    psy = PSyFactory(API, distributed_memory=True).create(invoke_info)

    first_invoke = psy.invokes.invoke_list[0]
    # Since (redundant) computation over annexed dofs is enabled, there
    # should be no halo exchange before the first (builtin) kernel call
    assert isinstance(first_invoke.schedule[0], DynLoop)
    # There should be a halo exchange for field f1 before the second
    # kernel call
    assert isinstance(first_invoke.schedule[1], DynHaloExchange)
    assert first_invoke.schedule[1].field.name == "f1"
    # Now transform the first loop to perform redundant computation out to
    # the level-1 halo
    rtrans = Dynamo0p3RedundantComputationTrans()
    _, _ = rtrans.apply(first_invoke.schedule[0], options={"depth": 1})
    # There should now be a halo exchange for f1 before the first
    # (builtin) kernel call
    assert isinstance(first_invoke.schedule[0], DynHaloExchange)
    assert first_invoke.schedule[0].field.name == "f1"
    assert isinstance(first_invoke.schedule[1], DynLoop)
    # There should only be one halo exchange for field f1
    assert len([
        node for node in first_invoke.schedule.walk(DynHaloExchange)
        if node.field.name == "f1"
    ]) == 1
    assert LFRicBuild(tmpdir).code_compiles(psy)
Пример #5
0
def test_gh_inc_nohex_1(tmpdir, monkeypatch):
    '''If COMPUTE_ANNEXED_DOFS is True, then a gh_inc access to a field in
    a kernel (iterating to the l1 halo) does not require a halo
    exchange when the previous writer is known and iterates over dofs
    to nannexed, halo(1), or halo max depth

    '''
    # ensure that COMPUTE_ANNEXED_DOFS is True
    config = Config.get()
    dyn_config = config.api_conf(API)
    monkeypatch.setattr(dyn_config, "_compute_annexed_dofs", True)

    # parse and get psy schedule
    _, info = parse(os.path.join(BASE_PATH,
                                 "14.12_halo_wdofs_to_inc.f90"),
                    api=API)
    psy = PSyFactory(API, distributed_memory=True).create(info)
    schedule = psy.invokes.invoke_list[0].schedule

    def check_schedule(schedule):
        '''Check this schedule has expected structure (loop, haloexchange,
        loop). In paricular there should be no halo exchange for the
        write-to-gh_inc dependence.

        :param schedule: a dynamo0.3 API schedule object
        :type schedule: :py:class:`psyclone.dynamo0p3.DynInvokeSchedule`.

        '''
        assert len(schedule.children) == 3
        loop1 = schedule.children[0]
        haloex = schedule.children[1]
        loop2 = schedule.children[2]
        assert isinstance(loop1, DynLoop)
        assert isinstance(haloex, DynHaloExchange)
        assert haloex.field.name == "f2"
        assert haloex.required() == (True, False)
        assert isinstance(loop2, DynLoop)

    # 1st loop should iterate over dofs to nannexed. Check output
    assert schedule.children[0].upper_bound_name == "nannexed"
    check_schedule(schedule)

    # just check compilation here (not later in this test) as
    # compilation of redundant computation is checked separately
    assert Dynamo0p3Build(tmpdir).code_compiles(psy)

    # make 1st loop iterate over dofs to the level 1 halo and check output
    rc_trans = Dynamo0p3RedundantComputationTrans()
    rc_trans.apply(schedule.children[0], depth=1)
    assert schedule.children[0].upper_bound_name == "dof_halo"
    assert schedule.children[0].upper_bound_halo_depth == 1
    check_schedule(schedule)

    # make 1st loop iterate over dofs to the maximum halo depth and
    # check output
    rc_trans.apply(schedule.children[0])
    assert schedule.children[0].upper_bound_name == "dof_halo"
    assert not schedule.children[0].upper_bound_halo_depth
    check_schedule(schedule)
Пример #6
0
def trans(psy):
    '''removes the grad_p halo exchanges by redundant computation then
    moves the remaining halo exchanges to the beginning of the invoke
    call'''
    from psyclone.transformations import Dynamo0p3RedundantComputationTrans, \
        MoveTrans
    rc_trans = Dynamo0p3RedundantComputationTrans()
    m_trans = MoveTrans()
    invoke = psy.invokes.invoke_list[0]
    schedule = invoke.schedule

    # redundant computation to remove grad_p halo exchanges
    rc_trans.apply(schedule.children[5], {"depth": 2})
    rc_trans.apply(schedule.children[0], {"depth": 2})

    # move remaining (potential) halo exchanges to start of the invoke
    m_trans.apply(schedule.children[0], schedule.children[4])

    return psy
Пример #7
0
def trans(psy):
    '''A sample transformation script to demonstrate the use of asynchronous
    halo exchanges with overlapping compute and communication for the
    most costly halo exchanges in the (current version of the) LFRic model.

    '''
    from psyclone.transformations import \
        Dynamo0p3RedundantComputationTrans, \
        Dynamo0p3AsyncHaloExchangeTrans, \
        MoveTrans

    schedule = psy.invokes.invoke_list[0].schedule
    schedule.view()

    # This transformation removes the halo exchange associated with
    # the grad_p field. This transformation is unnecessary if
    # annexed_dofs is set to True in the config file (although the
    # transformation still works).
    rc_trans = Dynamo0p3RedundantComputationTrans()
    rc_trans.apply(schedule.children[0], {"depth": 1})
    schedule.view()

    # This transformation splits the three synchronous halo exchanges
    # (for fields p, hb_inv and u_normalisation) into asynchronous
    # (halo_exchange_start and halo_exchange_end) ones.
    ahex_trans = Dynamo0p3AsyncHaloExchangeTrans()
    for kern in schedule.children[3:0:-1]:
        ahex_trans.apply(kern)
    schedule.view()

    # This transformation moves the start of the three halo exchanges
    # before the setval_c loop offering the potential for overlap
    # between communication and computation.
    mtrans = MoveTrans()
    for kern in schedule.children[5:0:-2]:
        mtrans.apply(kern, schedule.children[0])
    schedule.view()

    return psy
def test_gh_inc_max(tmpdir, monkeypatch, annexed):
    '''Check we generate correct halo exchange bounds when we have
    multiple read dependencies. In this case we have a gh_inc with a
    read-only reader and a gh_inc reader. We also test when annexed
    is False and True as it affects how many halo exchanges are
    generated.

    '''
    config = Config.get()
    dyn_config = config.api_conf(API)
    monkeypatch.setattr(dyn_config, "_compute_annexed_dofs", annexed)

    # parse and get psy schedule
    _, info = parse(os.path.join(BASE_PATH, "14.14_halo_inc_times3.f90"),
                    api=API)
    psy = PSyFactory(API, distributed_memory=True).create(info)
    schedule = psy.invokes.invoke_list[0].schedule
    rc_trans = Dynamo0p3RedundantComputationTrans()

    def check(haloex, depth):
        '''check the halo exchange has the expected properties

        :param haloex: a dynamo0.3 API halo-exchange object
        :type haloex: :py:class:`psyclone.dynamo0p3.DynHaloExchange`.
        :param int depth: The expected depth of the halo exchange \
        passed in as the first argument

        '''

        assert isinstance(haloex, DynHaloExchange)
        assert haloex.field.name == "f1"
        assert haloex.required() == (True, True)
        assert haloex._compute_halo_depth() == depth

    if annexed:
        haloidx = 2
        loop1idx = 3
        loop2idx = 5
    else:
        haloidx = 4
        loop1idx = 5
        loop2idx = 7

    # f1 halo exchange should be depth 1 : max(1,0)
    haloex = schedule.children[haloidx]
    check(haloex, "1")
    rc_trans.apply(schedule.children[loop2idx], depth=2)
    # f1 halo exchange should still be depth 1 : max(1,1)
    haloex = schedule.children[haloidx]
    check(haloex, "1")
    rc_trans.apply(schedule.children[loop2idx], depth=3)
    # f1 halo exchange should be depth 2 (max(1,2)
    haloex = schedule.children[haloidx]
    check(haloex, "2")
    rc_trans.apply(schedule.children[loop2idx])
    # f1 halo exchange should be depth max(1,max-1)
    haloex = schedule.children[haloidx]
    check(haloex, "max(mesh%get_halo_depth()-1,1)")
    # just check compilation here as it is the most
    # complicated. (Note, compilation of redundant computation is
    # checked separately)
    assert Dynamo0p3Build(tmpdir).code_compiles(psy)
    rc_trans.apply(schedule.children[loop1idx])
    # f1 halo exchange should be depth max
    haloex = schedule.children[haloidx]
    check(haloex, "mesh%get_halo_depth()")
def test_gh_inc_nohex_4(tmpdir, monkeypatch):
    '''If COMPUTE_ANNEXED_DOFS is False, then a gh_inc access to a field
    in a kernel (iterating to the l1 halo) does not require a halo
    exchange when the previous writer is known and iterates over cells
    to halo(1), halo(2) and halo max depth. Also, if the previous
    writer is a gh_inc access and its previous writer is unknown then
    it does require a halo exchange if it writes to halo(1) and
    requires a speculative halo exchange to halo(n-1) if iterating to
    halo(n) and a speculative halo exchange to halo(max_depth-1) if
    iterating to the maximum halo depth

    '''
    # ensure that COMPUTE_ANNEXED_DOFS is False
    config = Config.get()
    dyn_config = config.api_conf(API)
    monkeypatch.setattr(dyn_config, "_compute_annexed_dofs", False)

    # parse and get psy schedule
    _, info = parse(os.path.join(BASE_PATH, "14.13_halo_inc_to_inc.f90"),
                    api=API)
    psy = PSyFactory(API, distributed_memory=True).create(info)
    schedule = psy.invokes.invoke_list[0].schedule

    def check(schedule, f1depth, f2depth):
        '''check that the schedule is modified in the expected way. In
        particular, check that the depth of the halo exchange for
        field 'f1' is what we are expecting

        :param schedule: a dynamo0.3 API schedule object
        :type schedule: :py:class:`psyclone.dynamo0p3.DynInvokeSchedule`.
        :param int f1depth: The expected depth of the halo exchange \
        associated with field f1
        :param int f2depth: The expected depth of the halo exchange \
        associated with field f2

        '''
        assert len(schedule.children) == 4
        haloex1 = schedule.children[0]
        haloex2 = schedule.children[1]
        loop1 = schedule.children[2]
        loop2 = schedule.children[3]
        assert isinstance(haloex1, DynHaloExchange)
        assert haloex1.field.name == "f1"
        assert haloex1._compute_halo_depth() == f1depth
        assert haloex1.required() == (True, False)
        assert isinstance(haloex2, DynHaloExchange)
        assert haloex2.field.name == "f2"
        assert haloex2._compute_halo_depth() == f2depth
        assert haloex2.required() == (True, False)
        assert isinstance(loop1, DynLoop)
        assert isinstance(loop2, DynLoop)

    # we should now have a speculative halo exchange at the start of
    # the schedule for "f1" to depth 1 and "f2" to depth 1
    check(schedule, f1depth="1", f2depth="1")

    # just check compilation here (not later in this test) as
    # compilation of redundant computation is checked separately
    assert Dynamo0p3Build(tmpdir).code_compiles(psy)

    # make 1st loop iterate over cells to the level 2 halo and check output
    rc_trans = Dynamo0p3RedundantComputationTrans()
    rc_trans.apply(schedule.children[2], depth=2)
    # we should now have a speculative halo exchange at the start of
    # the schedule for "f1" to depth 1 and "f2" to depth 2
    check(schedule, f1depth="1", f2depth="2")

    # make 1st loop iterate over cells to the maximum halo depth and
    # check output
    rc_trans.apply(schedule.children[2])
    # we should now have a speculative halo exchange at the start of
    # the schedule for "f1" to depth max halo - 1 and "f2" to max halo
    check(schedule,
          f1depth="mesh%get_halo_depth()-1",
          f2depth="mesh%get_halo_depth()")
def test_gh_inc_nohex_2(tmpdir, monkeypatch):
    '''If COMPUTE_ANNEXED_DOFS is False, then a gh_inc access to a field in
    a kernel (iterating to the l1 halo) does require a halo
    exchange when the previous writer is known and iterates over dofs
    to ndofs but does not if it iterates to halo(1), or halo max depth

    '''
    # ensure that COMPUTE_ANNEXED_DOFS is False
    config = Config.get()
    dyn_config = config.api_conf(API)
    monkeypatch.setattr(dyn_config, "_compute_annexed_dofs", False)

    # parse and get psy schedule
    _, info = parse(os.path.join(BASE_PATH, "14.12_halo_wdofs_to_inc.f90"),
                    api=API)
    psy = PSyFactory(API, distributed_memory=True).create(info)
    schedule = psy.invokes.invoke_list[0].schedule

    # 1st loop should iterate over dofs to ndofs. Check output
    loop1 = schedule.children[0]
    haloex1 = schedule.children[1]
    haloex2 = schedule.children[2]
    loop2 = schedule.children[3]
    assert len(schedule.children) == 4
    assert isinstance(loop1, DynLoop)
    assert loop1.upper_bound_name == "ndofs"
    assert isinstance(haloex1, DynHaloExchange)
    assert haloex1.field.name == "f1"
    assert haloex1.required() == (True, True)
    assert isinstance(haloex2, DynHaloExchange)
    assert haloex2.field.name == "f2"
    assert haloex2.required() == (True, False)
    assert isinstance(loop2, DynLoop)

    # just check compilation here (not later in this test) as
    # compilation of redundant computation is checked separately
    assert Dynamo0p3Build(tmpdir).code_compiles(psy)

    # make 1st loop iterate over dofs to the level 1 halo and check
    # output. There should be no halo exchange for field "f1"
    rc_trans = Dynamo0p3RedundantComputationTrans()
    rc_trans.apply(schedule.children[0], depth=1)
    loop1 = schedule.children[0]
    haloex = schedule.children[1]
    loop2 = schedule.children[2]
    assert len(schedule.children) == 3
    assert isinstance(loop1, DynLoop)
    assert loop1.upper_bound_name == "dof_halo"
    assert loop1.upper_bound_halo_depth == 1
    assert isinstance(haloex, DynHaloExchange)
    assert haloex.field.name == "f2"
    assert haloex.required() == (True, False)
    assert isinstance(loop2, DynLoop)

    # make 1st loop iterate over dofs to the maximum halo depth and
    # check output
    rc_trans.apply(schedule.children[0])
    loop1 = schedule.children[0]
    haloex = schedule.children[1]
    loop2 = schedule.children[2]
    assert len(schedule.children) == 3
    assert isinstance(loop1, DynLoop)
    assert loop1.upper_bound_name == "dof_halo"
    assert not loop1.upper_bound_halo_depth
    assert isinstance(haloex, DynHaloExchange)
    assert haloex.field.name == "f2"
    assert haloex.required() == (True, False)
    assert isinstance(loop2, DynLoop)