def trans(psy): '''A PSyclone-script compliant transformation function. Adds profiling to an invoke. :param psy: The PSy layer object to apply transformations to. :type psy: :py:class:`psyclone.psyGen.PSy` ''' print("Invokes found:\n{0}\n".format("\n".join( [str(name) for name in psy.invokes.names]))) p_trans = ProfileTrans() for invoke in psy.invokes.invoke_list: sched = invoke.schedule if not sched: print("Invoke {0} has no Schedule! Skipping...".format( invoke.name)) continue loops = sched.walk(Loop) loop_counter = 0 for loop in loops: if loop.loop_type == "levels": # We only put profiling around loops over levels loop_counter += 1 p_trans.apply( loop, {"region_name": ("kloop", "{}".format(loop_counter))}) sched.view()
def test_profile_trans_name(options): '''Check that providing no option or an option not associated with the profile transformation does not result in anything being passed into ProfileNode via the name argument and that providing an option associated with the profile transformation does result in the relevant names being passed into ProfileNode via the name argument. This is checked by looking at the variables '_module_name' and '_region_name' which are set to the name argument values if they are provided, otherwise the variables are set to None. ''' from psyclone.tests.utilities import get_invoke _, invoke = get_invoke("1_single_invoke.f90", "dynamo0.3", idx=0) schedule = invoke.schedule profile_trans = ProfileTrans() if options: _, _ = profile_trans.apply(schedule.children, options=options) else: _, _ = profile_trans.apply(schedule.children) profile_node = schedule[0] if options and "region_name" in options: assert profile_node._module_name == "mod" assert profile_node._region_name == "reg" else: assert profile_node._module_name is None assert profile_node._region_name is None
def test_region(): ''' Tests that the profiling transform works correctly when a region of code is specified that does not cover the full invoke and also contains multiple kernels. ''' _, invoke = get_invoke("3.1_multi_functions_multi_invokes.f90", "dynamo0.3", name="invoke_0", dist_mem=True) schedule = invoke.schedule prt = ProfileTrans() # Just halo exchanges. _ = prt.apply(schedule[0:4]) # Two loops. _ = prt.apply(schedule[1:3]) result = str(invoke.gen()) assert ("CALL profile_psy_data%PreStart(\"multi_functions_multi_invokes_" "psy\", \"invoke_0:r0\", 0, 0)" in result) assert ("CALL profile_psy_data_1%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r1\", 0, 0)" in result) # Make nested profiles. _ = prt.apply(schedule[1].profile_body[1]) _ = prt.apply(schedule) result = str(invoke.gen()) assert ("CALL profile_psy_data_3%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r0\", 0, 0)" in result) assert ("CALL profile_psy_data%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r1\", 0, 0)" in result) assert ("CALL profile_psy_data_1%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r2\", 0, 0)" in result) assert ("CALL profile_psy_data_2%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:testkern_code:r3\", 0, 0)" in result)
def test_omp_transform(): '''Tests that the profiling transform works correctly with OMP parallelisation.''' _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule # This test expects constant loop bounds schedule._const_loop_bounds = True prt = ProfileTrans() omp_loop = GOceanOMPLoopTrans() omp_par = OMPParallelTrans() # Parallelise the first loop: omp_loop.apply(schedule[0]) omp_par.apply(schedule[0]) prt.apply(schedule[0]) correct = ( " CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", " "\"invoke_loop1:bc_ssh_code:r0\", 0, 0)\n" " !$omp parallel default(shared), private(i,j)\n" " !$omp do schedule(static)\n" " DO j=2,jstop\n" " DO i=2,istop\n" " CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask)\n" " END DO\n" " END DO\n" " !$omp end do\n" " !$omp end parallel\n" " CALL profile_psy_data%PostEnd") code = str(invoke.gen()) assert correct in code # Now add another profile node between the omp parallel and omp do # directives: prt.apply(schedule[0].profile_body[0].dir_body[0]) code = str(invoke.gen()) correct = \ "CALL profile_psy_data%PreStart(\"psy_test27_loop_swap\", " + \ '''"invoke_loop1:bc_ssh_code:r0", 0, 0) !$omp parallel default(shared), private(i,j) CALL profile_psy_data_1%PreStart("psy_test27_loop_swap", ''' + \ '''"invoke_loop1:bc_ssh_code:r1", 0, 0) !$omp do schedule(static) DO j=2,jstop DO i=2,istop CALL bc_ssh_code(i, j, 1, t%data, t%grid%tmask) END DO END DO !$omp end do CALL profile_psy_data_1%PostEnd !$omp end parallel CALL profile_psy_data%PostEnd''' assert correct in code
def test_transform_errors(capsys): '''Tests error handling of the profile region transformation.''' # This has been imported and tested before, so we can assume # here that this all works as expected/ _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule prt = ProfileTrans() # Just to be sure: also check that the right order does indeed work! sched1, _ = prt.apply( [schedule.children[0], schedule.children[1], schedule.children[2]]) sched1.view() out, _ = capsys.readouterr() # out is unicode, and has no replace function, so convert to string first out = str(out).replace("\n", "") correct_re = (".*GOInvokeSchedule.*?" r"Profile.*?" r"Loop.*\[type='outer'.*?" r"Loop.*\[type='outer'.*?" r"Loop.*\[type='outer'") assert re.search(correct_re, out) # Test that we don't add a profile node inside a OMP do loop (which # would be invalid syntax): _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule prt = ProfileTrans() omp_loop = GOceanOMPLoopTrans() # Parallelise the first loop: sched1, _ = omp_loop.apply(schedule[0]) # Inserting a ProfileTrans inside a omp do loop is syntactically # incorrect, the inner part must be a do loop only: with pytest.raises(TransformationError) as excinfo: prt.apply(sched1[0].dir_body[0]) assert "A PSyData node cannot be inserted between an OpenMP/ACC "\ "directive and the loop(s) to which it applies!" \ in str(excinfo.value) with pytest.raises(TransformationError) as excinfo: prt.apply(sched1[0], {"region_name": "xx"}) assert "Error in ProfileTrans. User-supplied region name must be a " \ "tuple containing two non-empty strings" in str(excinfo.value)
def trans(psy): ''' Take the supplied psy object, add OpenACC directives and then enclose the whole schedule within a profiling region. :param psy: the PSy layer to transform. :type psy: :py:class:`psyclone.gocean1p0.GOPSy` :returns: the transformed PSy object. :rtype: :py:class:`psyclone.gocean1p0.GOPSy` ''' from psyclone.psyir.transformations import ProfileTrans proftrans = ProfileTrans() # Use the trans() routine in acc_transform.py to add the OpenACC directives psy = acc_trans(psy) invoke = psy.invokes.get('invoke_0_inc_field') schedule = invoke.schedule # Enclose everything in a profiling region newschedule, _ = proftrans.apply(schedule.children) invoke.schedule = newschedule newschedule.view() return psy
def add_profile_nodes(schedule, loop_class): '''This function inserts all required Profiling Nodes (for invokes and kernels, as specified on the command line) into a schedule. :param schedule: The schedule to instrument. :type schedule: :py:class:`psyclone.psyGen.InvokeSchedule` or \ derived class :param loop_class: The loop class (e.g. GOLoop, DynLoop) to instrument. :type loop_class: :py:class:`psyclone.psyir.nodes.Loop` or \ derived class. ''' from psyclone.psyir.transformations import ProfileTrans profile_trans = ProfileTrans() if Profiler.profile_kernels(): for i in schedule.children: if isinstance(i, loop_class): profile_trans.apply(i) if Profiler.profile_invokes(): profile_trans.apply(schedule.children)
def test_profile_trans_invalid_name(value): '''Invalid name supplied to options argument.''' profile_trans = ProfileTrans() # We need to have a schedule as parent, otherwise the node # (with no parent) will not be allowed. sched = Schedule() node = Statement(parent=sched) sched.addchild(node) with pytest.raises(TransformationError) as excinfo: _ = profile_trans.apply(node, options={"region_name": value}) assert ("User-supplied region name must be a tuple containing " "two non-empty strings." in str(excinfo.value))
def test_profile_named_dynamo0p3(): '''Check that the Dynamo 0.3 API is instrumented correctly when the profile name is supplied by the user. ''' psy, invoke = get_invoke("1_single_invoke.f90", "dynamo0.3", idx=0) schedule = invoke.schedule profile_trans = ProfileTrans() options = {"region_name": (psy.name, invoke.name)} _, _ = profile_trans.apply(schedule.children, options=options) result = str(invoke.gen()) assert ("CALL profile_psy_data%PreStart(\"single_invoke_psy\", " "\"invoke_0_testkern_type\", 0, 0)") in result
def test_multi_prefix_profile(monkeypatch): ''' Tests that the profiling transform works correctly when we use two different profiling tools in the same invoke. ''' _, invoke = get_invoke("3.1_multi_functions_multi_invokes.f90", "dynamo0.3", name="invoke_0", dist_mem=True) schedule = invoke.schedule prt = ProfileTrans() config = Config.get() # Monkeypatch the list of recognised PSyData prefixes monkeypatch.setattr(config, "_valid_psy_data_prefixes", ["profile", "tool1"]) # Use the 'tool1' prefix for the region around the halo exchanges. _ = prt.apply(schedule[0:4], options={"prefix": "tool1"}) # Use the default prefix for the two loops. _ = prt.apply(schedule[1:3]) result = str(invoke.gen()) assert (" USE profile_psy_data_mod, ONLY: profile_PSyDataType\n" in result) assert " USE tool1_psy_data_mod, ONLY: tool1_PSyDataType" in result assert (" TYPE(profile_PSyDataType), target, save :: " "profile_psy_data\n" " TYPE(tool1_PSyDataType), target, save :: tool1_psy_data" in result) assert (" ! Call kernels and communication routines\n" " !\n" " CALL tool1_psy_data%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r0\", 0, 0)\n" " IF (f1_proxy%is_dirty(depth=1)) THEN\n" in result) assert (" CALL tool1_psy_data%PostEnd\n" " CALL profile_psy_data%PreStart(\"multi_functions_multi_" "invokes_psy\", \"invoke_0:r1\", 0, 0)\n" " DO cell=1,mesh%get_last_halo_cell(1)" in result) assert (" CALL f1_proxy%set_dirty()\n" " !\n" " CALL profile_psy_data%PostEnd\n" " DO cell=1,mesh%get_last_halo_cell(1)" in result)
def trans(psy): '''A PSyclone-script compliant transformation function. Adds profiling to an invoke. :param psy: The PSy layer object to apply transformations to. :type psy: :py:class:`psyclone.psyGen.PSy` ''' print("Invokes found:\n{0}\n".format("\n".join( [str(name) for name in psy.invokes.names]))) p_trans = ProfileTrans() for invoke in psy.invokes.invoke_list: sched = invoke.schedule if not sched: print("Invoke {0} has no Schedule! Skipping...".format( invoke.name)) continue # Enclose all children of the schedule within a single profile region p_trans.apply(sched.children) sched.view()
def add_profile_nodes(schedule, loop_class): '''This function inserts all required Profiling Nodes (for invokes and kernels, as specified on the command line) into a schedule. An invoke will not be profiled if it contains more than one Return or if the Return is not the last statement. :param schedule: The schedule to instrument. :type schedule: :py:class:`psyclone.psyGen.InvokeSchedule` or subclass :param loop_class: The loop class (e.g. GOLoop, DynLoop) to instrument. :type loop_class: :py:class:`psyclone.psyir.nodes.Loop` or subclass ''' profile_trans = ProfileTrans() if Profiler.profile_kernels(): kernels = schedule.walk(Kern) for kernel in kernels: # For each kernel, we walk back up to find the outermost loop # of the specified class target = None parent_loop = kernel.ancestor(loop_class) while parent_loop: nchildren = len(parent_loop.loop_body.children) if nchildren != 1: # We only permit tightly-nested loops break target = parent_loop parent_loop = parent_loop.ancestor(loop_class) # We only add profiling if we're not within some OpenACC # region (as otherwise, the PSyData routines being called # would have to be compiled for the GPU). if target and not target.ancestor(ACCDirective): # Have to take care that the target loop does not have # a directive applied to it. We distinguish this case # from that of a directive defining a region by checking # the number of children of the directive. if (isinstance(target.parent.parent, Directive) and len(target.parent.parent.dir_body.children) == 1): # Parent is a Directive that has only the current # loop as a child. Therefore, enclose the Directive # within the profiling region too. target = target.parent.parent profile_trans.apply(target) if Profiler.profile_invokes(): # We cannot include Return statements within profiling regions returns = schedule.walk(Return) if returns: if len(returns) == 1 and returns[0] is schedule.children[-1]: # There's only one Return and it's the last statement so # simply exclude it from the profiling region. profile_trans.apply(schedule.children[:-1]) else: # TODO #11 use logging instead. print("Not adding profiling to routine '{0}' because it " "contains one or more Return statements.".format( schedule.name), file=sys.stderr) else: profile_trans.apply(schedule.children)
def test_no_psydata_in_kernels(parser, monkeypatch): ''' Check that we refuse to generate code when a kernels region contains PSyData calls. ''' code = parser(FortranStringReader(EXPLICIT_LOOP)) psy = PSyFactory(API, distributed_memory=False).create(code) schedule = psy.invokes.invoke_list[0].schedule ptrans = ProfileTrans() acc_trans = ACCKernelsTrans() acc_trans.apply(schedule[0]) # Attempt to put a profiling call within the loop assign = schedule.walk(Assignment)[0] with pytest.raises(TransformationError) as err: ptrans.apply(assign) assert ("A PSyData node cannot be inserted inside an OpenACC region" in str(err.value)) # Monkeypatch the validate() method so as to avoid the checking # that it does monkeypatch.setattr(ptrans, "validate", lambda x, y: None) ptrans.apply(assign) # Check that an appropriate error is raised at code-generation time with pytest.raises(GenerationError) as err: _ = psy.gen assert ("Cannot include calls to PSyData routines within OpenACC " "regions" in str(err.value))
def test_profile_named_gocean1p0(): '''Check that the gocean 1.0 API is instrumented correctly when the profile name is supplied by the user. ''' psy, invoke = get_invoke("test11_different_iterates_over_one_invoke.f90", "gocean1.0", idx=0) schedule = invoke.schedule profile_trans = ProfileTrans() options = {"region_name": (psy.name, invoke.name)} _ = profile_trans.apply(schedule.children, options=options) result = str(invoke.gen()) assert ("CALL profile_psy_data%PreStart(" "\"psy_single_invoke_different_iterates_over\", " "\"invoke_0\", 0, 0)") in result
def test_profile_basic(capsys): '''Check basic functionality: node names, schedule view. ''' Profiler.set_options([Profiler.INVOKES]) _, invoke = get_invoke("test11_different_iterates_over_one_invoke.f90", "gocean1.0", idx=0, dist_mem=False) # This test expects constant loop bounds invoke.schedule._const_loop_bounds = True Profiler.add_profile_nodes(invoke.schedule, Loop) assert isinstance(invoke.schedule[0], ProfileNode) invoke.schedule.view() out, _ = capsys.readouterr() gsched = colored("GOInvokeSchedule", GOInvokeSchedule._colour) sched = colored("Schedule", Schedule._colour) loop = Loop().coloured_name(True) profile = invoke.schedule[0].coloured_name(True) # Do one test based on schedule view, to make sure colouring # and indentation is correct expected = (gsched + "[invoke='invoke_0', Constant loop bounds=True]\n" " 0: " + profile + "[]\n" " " + sched + "[]\n" " 0: " + loop + "[type='outer', field_space='go_cv', " "it_space='go_internal_pts']\n") assert expected in out prt = ProfileTrans() # Insert a profile call between outer and inner loop. # This tests that we find the subroutine node even # if it is not the immediate parent. new_sched, _ = prt.apply(invoke.schedule[0].profile_body[0].loop_body[0]) new_sched_str = str(new_sched) correct = ("""GOInvokeSchedule[invoke='invoke_0', \ Constant loop bounds=True]: ProfileStart[var=profile_psy_data] GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop-1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: ProfileStart[var=profile_psy_data_1] GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: compute_cv_code End Schedule End GOLoop ProfileEnd End Schedule End GOLoop GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_ssh_code End Schedule End GOLoop End Schedule End GOLoop ProfileEnd End Schedule""") assert correct in new_sched_str Profiler.set_options(None)
def test_transform(capsys): '''Tests normal behaviour of profile region transformation.''' # pylint: disable=too-many-locals _, invoke = get_invoke("test27_loop_swap.f90", "gocean1.0", name="invoke_loop1", dist_mem=False) schedule = invoke.schedule # This test expects constant loop bounds schedule._const_loop_bounds = True prt = ProfileTrans() assert str(prt) == "Create a sub-tree of the PSyIR that has " \ "a node of type ProfileNode at its root." assert prt.name == "ProfileTrans" # Try applying it to a list sched1, _ = prt.apply(schedule.children) correct = ("""GOInvokeSchedule[invoke='invoke_loop1', \ Constant loop bounds=True]: ProfileStart[var=profile_psy_data] GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_ssh_code End Schedule End GOLoop End Schedule End GOLoop GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_solid_u_code End Schedule End GOLoop End Schedule End GOLoop GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_solid_v_code End Schedule End GOLoop End Schedule End GOLoop ProfileEnd End Schedule""") assert correct in str(sched1) # Now only wrap a single node - the middle loop: sched2, _ = prt.apply(schedule[0].profile_body[1]) correct = ("""GOInvokeSchedule[invoke='invoke_loop1', \ Constant loop bounds=True]: ProfileStart[var=profile_psy_data] GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'2', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_ssh_code End Schedule End GOLoop End Schedule End GOLoop ProfileStart[var=profile_psy_data_1] GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_solid_u_code End Schedule End GOLoop End Schedule End GOLoop ProfileEnd GOLoop[id:'', variable:'j', loop_type:'outer'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'jstop', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: GOLoop[id:'', variable:'i', loop_type:'inner'] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Literal[value:'istop+1', Scalar<INTEGER, UNDEFINED>] Literal[value:'1', Scalar<INTEGER, UNDEFINED>] Schedule: kern call: bc_solid_v_code End Schedule End GOLoop End Schedule End GOLoop ProfileEnd End Schedule""") assert correct in str(sched2) # Check that a sublist created from individual elements # can be wrapped sched3, _ = prt.apply( [sched2[0].profile_body[0], sched2[0].profile_body[1]]) sched3.view() out, _ = capsys.readouterr() gsched = colored("GOInvokeSchedule", GOInvokeSchedule._colour) prof = colored("Profile", ProfileNode._colour) sched = colored("Schedule", Schedule._colour) loop = colored("Loop", Loop._colour) indent = 4 * " " correct = (gsched + "[invoke='invoke_loop1', Constant loop bounds=True]\n" + indent + "0: " + prof + "[]\n" + 2 * indent + sched + "[]\n" + 3 * indent + "0: " + prof + "[]\n" + 4 * indent + sched + "[]\n" + 5 * indent + "0: " + loop + "[type='outer', field_space='go_ct'," " it_space='go_internal_pts']\n") assert correct in out correct2 = (5 * indent + "1: " + prof + "[]\n" + 6 * indent + sched + "[]\n" + 7 * indent + "0: " + loop + "[type='outer', field_space='go_cu'," " it_space='go_all_pts']\n") assert correct2 in out