def __init__(self, version, **kwargs): super().__init__('deepcopy_%s_example' % version.replace('+', '_'), **kwargs) self.sourcepath = 'deepcopy/' self.valid_prog_environs = ['PrgEnv-pgi'] self.modules = ['craype-accel-nvidia60'] self.executable = './deepcopy/deepcopy.%s' % version.replace('+', '.') self.sanity_patterns = sn.assert_found('3', self.stdout)
def __init__(self, lang='c', **kwargs): super().__init__('axpy_%s_example' % lang, **kwargs) if lang == 'fortran': self.executable = './axpy/axpy.openacc.fort' else: self.executable = './axpy/axpy.openacc' self.sourcepath = 'axpy/' self.sanity_patterns = sn.assert_found('PASSED', self.stdout)
def __init__(self, version, **kwargs): super().__init__('diffusion_%s_example' % version.replace('+', '_'), **kwargs) self.sourcepath = 'diffusion/' self.executable = ('./diffusion/diffusion2d.%s' % version.replace('+', '.')) if 'mpi' in version: self.num_tasks = 2 self.num_tasks_per_node = 1 self.variables = { 'CRAY_CUDA_MPS': '1', 'MPICH_RDMA_ENABLED_CUDA': '1' } self.sanity_patterns = sn.assert_found('writing to output', self.stdout) self.keep_files = ['output.bin', 'output.bov']
def __init__(self, hugepages): super().__init__() self.descr = 'Time to allocate 4096 MB using %s hugepages' % hugepages self.sourcepath = 'alloc_speed.cpp' self.build_system = 'SingleSource' self.build_system.cxxflags = ['-O3', '-std=c++11'] self.valid_systems = ['daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc'] self.valid_prog_environs = ['PrgEnv-gnu'] if hugepages == 'no': self.valid_systems += ['kesch:cn', 'kesch:pn'] else: if self.current_system.name in {'dom', 'daint', 'tiger'}: self.modules = ['craype-hugepages%s' % hugepages] if self.current_system.name in {'tiger'}: self.extra_resources = {'mem-per-cpu': {'mem_per_cpu': 0}} self.sanity_patterns = sn.assert_found('4096 MB', self.stdout) self.perf_patterns = { 'time': sn.extractsingle(r'4096 MB, allocation time (?P<time>\S+)', self.stdout, 'time', float) } self.sys_reference = { 'no': { 'dom:gpu': { 'time': (1.00, None, 0.10, 's') }, 'dom:mc': { 'time': (1.20, None, 0.10, 's') }, 'daint:gpu': { 'time': (1.80, None, 0.10, 's') }, 'daint:mc': { 'time': (2.00, None, 0.10, 's') }, 'kesch:cn': { 'time': (1.25, None, 0.10, 's') }, 'kesch:pn': { 'time': (0.55, None, 0.10, 's') }, '*': { 'time': (0, None, None, 's') } }, '2M': { 'dom:gpu': { 'time': (0.11, None, 0.10, 's') }, 'dom:mc': { 'time': (0.20, None, 0.10, 's') }, 'daint:gpu': { 'time': (0.11, None, 0.10, 's') }, 'daint:mc': { 'time': (0.20, None, 0.10, 's') }, '*': { 'time': (0, None, None, 's') } }, } self.reference = self.sys_reference[hugepages] self.maintainers = ['AK', 'VH'] self.tags = {'production', 'craype'}
def set_sanity(self): return sn.all([ sn.assert_found(r'GFLOPs EMP', self.roofline_out_script1), sn.assert_found(r'DRAM EMP', self.roofline_out_script1), sn.assert_found('Empirical roofline graph:', self.stdout), ])
def validate_test(self): return sn.assert_found(r'CPU affinity', self.stdout)
def assert_sanity(self): return sn.assert_found(r'perftest', self.stdout)
def __init__(self): self.executable = 'echo' self.executable_opts = ['hello'] self.valid_prog_environs = ['*'] self.valid_systems = ['*'] self.sanity_patterns = sn.assert_found(r'hello', self.stdout)
def __init__(self): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = [ 'PrgEnv-gnu', 'cpeGNU' # 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray', # 'PrgEnv-aocc', 'cpeGNU', 'cpeIntel', 'cpeCray', 'cpeAMD', ] self.valid_systems = [ 'dom:mc', 'dom:gpu', 'daint:mc', 'daint:gpu', 'eiger:mc', 'pilatus:mc' ] self.tool = 'atp' self.modules = [self.tool, 'cray-stat'] # cray-cti self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu', 'craype', 'debugging'} # }}} # {{{ compile self.testname = 'sedov' self.sourcepath = f'{self.testname}.cpp' # atp requires full path: self.executable = './mpi+omp' cs = self.current_system.name re_slm_1 = 'libAtpSigHandler.so' re_slm_2 = 'libAtpDispatch.so' re_epr_ini1 = '^slurm = True' re_epr_cfg1 = '.debug., .ps., None, False' re_epr_cfg2 = '.eproxy., .eproxy., None, True' re_hosts_1 = 'login' re_ver_1 = 'STAT_VERSION1=$' re_ver_2 = 'STAT_VERSION2=$' re_ver_3 = 'ATP_VERSION1=$' re_ver_4 = 'ATP_VERSION2=$' re_ver_5 = 'ATP_HOME=$' re_which_1 = 'not found' re_stderr_1 = 'forcing job termination|Force Terminated (job|Step)' re_stderr_2 = 'Producing core dumps for rank' re_stderr_3 = 'View application merged backtrace tree with: stat-view' re_dot_1 = 'MPI_Allreduce|MPID_Abort|PMPI_Abort' re_dot_2 = 'sphexa::sph::|cstone::' re_core_1 = 'core file x86-64' # TODO: grep sphexa::sph atpMergedBT_line.dot -> perf_patterns # 94 [pos="0,0", label="sphexa::sph::neighborsSum(... ldd_rpt = 'ldd.rpt' cfg_rpt = 'cfg.rpt' version_rpt = 'version.rpt' which_rpt = 'which.rpt' slurm_cfg_file = '/etc/opt/slurm/plugstack.conf' cfg_file_path = '/opt/cray/elogin/eproxy' eproxy_ini_cfg_file = f'{cfg_file_path}/etc/eproxy.ini' eproxy_cfg_file = f'{cfg_file_path}/default/bin/eproxy_config.py' hosts_cfg_file = '/etc/hosts' apt_dot_file = 'atpMergedBT_line.dot' # TODO: regex_rk0 = 'core.atp.*.0.0.*' # {{{ Needed when reporting a support case: if cs not in {'pilatus', 'eiger'}: self.prebuild_cmds += [ # --- check slurm_cfg # (optional /opt/cray/pe/atp/libAtpDispatch.so) f'grep "{re_slm_2}" {slurm_cfg_file} > {cfg_rpt}', # --- check ini_cfg_file (slurm = True) f'grep "{re_epr_ini1}" {eproxy_ini_cfg_file} >> {cfg_rpt}', # --- check eproxy_cfg_file (['debug', 'ps', None, False]) f'grep "{re_epr_cfg1}" {eproxy_cfg_file} >> {cfg_rpt}', # --- check eproxy_cfg_file (['eproxy', 'eproxy', None, True]) f'grep "{re_epr_cfg2}" {eproxy_cfg_file} >> {cfg_rpt}', # --- check STAT_MOM_NODE in /etc/hosts (daintgw01|domgw03) f'grep "{re_hosts_1}" {hosts_cfg_file} >> {cfg_rpt}', # --- chech stat version f'echo STAT_VERSION1=$STAT_VERSION > {version_rpt}', f'echo STAT_VERSION2=`STATbin --version` >> {version_rpt}', ] else: self.prebuild_cmds += [ # --- chech stat version f'echo STAT_VERSION1=$STAT_LEVEL > {version_rpt}', f'echo STAT_VERSION2=`STATbin --version` >> {version_rpt}', ] # }}} self.prebuild_cmds += [ # TODO: open cray case f'export PKG_CONFIG_PATH=$ATP_INSTALL_DIR/lib/pkgconfig:' f'$PKG_CONFIG_PATH', # --- check atp version and path f'echo ATP_VERSION1=$ATP_VERSION >> {version_rpt}', f'echo ATP_VERSION2=' f'`pkg-config --modversion libAtpSigHandler` >> {version_rpt}', f'echo ATP_HOME=$ATP_HOME >> {version_rpt}', f'pkg-config --variable=exec_prefix libAtpSigHandler &>{which_rpt}' ] self.postbuild_cmds += [ # --- check exe (/opt/cray/pe/atp/3.8.1/lib/libAtpSigHandler.so.1) f'ldd {self.executable}* |grep "{re_slm_1}" &> {ldd_rpt}', ] # }}} # {{{ run self.time_limit = '10m' self.variables['ATP_ENABLED'] = '1' self.postrun_cmds += [ f'ldd {self.executable}* |grep atp', 'file core*' ] # {{{ TODO: gdb_command # - gdb_command = (r'-e %s ' # - r'--eval-command="set pagination off" ' # - r'--eval-command="bt" ' # - r'--eval-command="quit"' % self.executable) # - regex_not_rk0 = r'grep -m1 -v atp' # - self.postrun_cmds = [ # - 'echo stoptime=`date +%s`', # - # --- rank 0: MPI_Allreduce # - f'gdb -c {regex_rk0} {gdb_command} &> {self.rpt_rk0}', # - # - # --- rank>2: MPI::Comm::Abort # -# f'ln -s `ls -1 core.* |{regex_not_rk0}` mycore', # -# f'gdb -c mycore {gdb_command} &> {self.rpt_rkn}', # - # can't do this because core filename is unknown at runtime: # - # 'gdb -c core.atp.*.%s.* -e %s' % (self.core, self.executable), # - # - '# stat-view atpMergedBT_line.dot' # }}} # }}} # {{{ sanity # TODO: self.sanity_patterns += ... ? if cs in {'pilatus', 'eiger'}: self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'UpdateSmoothingLength: \S+s', self.stdout), # check the tool output: sn.assert_found(re_slm_1, ldd_rpt), # sn.assert_not_found(re_ver_1, version_rpt), sn.assert_not_found(re_ver_2, version_rpt), sn.assert_not_found(re_ver_3, version_rpt), sn.assert_not_found(re_ver_4, version_rpt), sn.assert_not_found(re_ver_5, version_rpt), sn.assert_not_found(re_which_1, which_rpt), # sn.assert_found(re_stderr_1, self.stderr), sn.assert_found(re_stderr_2, self.stderr), sn.assert_found(re_stderr_3, self.stderr), # sn.assert_found(re_dot_1, apt_dot_file), # sn.assert_found(re_dot_2, apt_dot_file), sn.assert_found(re_core_1, self.stdout), ]) else: self.sanity_patterns = sn.all([ # check the job output: sn.assert_found(r'UpdateSmoothingLength: \S+s', self.stdout), # check the tool output: sn.assert_found(re_slm_1, ldd_rpt), sn.assert_found(re_slm_2, cfg_rpt), sn.assert_found(re_epr_ini1, cfg_rpt), sn.assert_found(re_epr_cfg1, cfg_rpt), sn.assert_found(re_epr_cfg2, cfg_rpt), sn.assert_found(re_hosts_1, cfg_rpt), # sn.assert_not_found(re_ver_1, version_rpt), sn.assert_not_found(re_ver_2, version_rpt), sn.assert_not_found(re_ver_3, version_rpt), sn.assert_not_found(re_ver_4, version_rpt), sn.assert_not_found(re_ver_5, version_rpt), sn.assert_not_found(re_which_1, which_rpt), # sn.assert_found(re_stderr_1, self.stderr), sn.assert_found(re_stderr_2, self.stderr), sn.assert_found(re_stderr_3, self.stderr), # sn.assert_found(re_dot_1, apt_dot_file), # sn.assert_found(re_dot_2, apt_dot_file), sn.assert_found(re_core_1, self.stdout), ])
def __init__(self): self.valid_systems = ['sys0:p0', 'sys0:p1'] self.valid_prog_environs = ['e0', 'e1'] self.executable = 'echo' self.executable_opts = [self.name] self.sanity_patterns = sn.assert_found(self.name, self.stdout)
def __init__(self): self.descr = 'STREAM Benchmark' self.exclusive_access = True self.valid_systems = [ 'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'kesch:cn', 'kesch:pn', 'tiger:gpu', 'arolla:cn', 'arolla:pn', 'tsa:cn', 'tsa:pn' ] self.valid_prog_environs = [ 'PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray_classic' ] self.use_multithreading = False self.prgenv_flags = { 'PrgEnv-cray_classic': ['-homp', '-O3'], 'PrgEnv-cray': ['-fopenmp', '-O3'], 'PrgEnv-gnu': ['-fopenmp', '-O3'], 'PrgEnv-intel': ['-qopenmp', '-O3'], 'PrgEnv-pgi': ['-mp', '-O3'] } if self.current_system.name == 'kesch': self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-gnu'] cray_flags = self.prgenv_flags['PrgEnv-cray_classic'] self.prgenv_flags['PrgEnv-cray'] = cray_flags elif self.current_system.name in ['arolla', 'tsa']: self.exclusive_access = True self.valid_prog_environs = ['PrgEnv-gnu'] self.sourcepath = 'stream.c' self.build_system = 'SingleSource' self.num_tasks = 1 self.num_tasks_per_node = 1 self.stream_cpus_per_task = { 'arolla:cn': 16, 'arolla:pn': 16, 'daint:gpu': 12, 'daint:mc': 36, 'dom:gpu': 12, 'dom:mc': 36, 'kesch:cn': 24, 'kesch:pn': 24, 'leone:normal': 16, 'monch:compute': 20, 'tsa:cn': 16, 'tsa:pn': 16, } self.variables = {'OMP_PLACES': 'threads', 'OMP_PROC_BIND': 'spread'} self.sanity_patterns = sn.assert_found( r'Solution Validates: avg error less than', self.stdout) self.perf_patterns = { 'triad': sn.extractsingle(r'Triad:\s+(?P<triad>\S+)\s+\S+', self.stdout, 'triad', float) } self.stream_bw_reference = { 'PrgEnv-cray_classic': { 'daint:gpu': { 'triad': (57000, -0.05, None, 'MB/s') }, 'daint:mc': { 'triad': (117000, -0.05, None, 'MB/s') }, 'dom:gpu': { 'triad': (57000, -0.05, None, 'MB/s') }, 'dom:mc': { 'triad': (117000, -0.05, None, 'MB/s') }, }, 'PrgEnv-cray': { 'daint:gpu': { 'triad': (44000, -0.05, None, 'MB/s') }, 'daint:mc': { 'triad': (89000, -0.05, None, 'MB/s') }, 'dom:gpu': { 'triad': (44000, -0.05, None, 'MB/s') }, 'dom:mc': { 'triad': (89000, -0.05, None, 'MB/s') }, 'kesch:cn': { 'triad': (85000, -0.05, None, 'MB/s') }, 'kesch:pn': { 'triad': (113000, -0.05, None, 'MB/s') }, }, 'PrgEnv-gnu': { 'daint:gpu': { 'triad': (43800, -0.05, None, 'MB/s') }, 'daint:mc': { 'triad': (88500, -0.05, None, 'MB/s') }, 'dom:gpu': { 'triad': (43800, -0.05, None, 'MB/s') }, 'dom:mc': { 'triad': (87500, -0.05, None, 'MB/s') }, 'kesch:cn': { 'triad': (47000, -0.05, None, 'MB/s') }, 'kesch:pn': { 'triad': (84400, -0.05, None, 'MB/s') }, }, 'PrgEnv-intel': { 'daint:gpu': { 'triad': (59500, -0.05, None, 'MB/s') }, 'daint:mc': { 'triad': (119000, -0.05, None, 'MB/s') }, 'dom:gpu': { 'triad': (59500, -0.05, None, 'MB/s') }, 'dom:mc': { 'triad': (119000, -0.05, None, 'MB/s') }, }, 'PrgEnv-pgi': { 'daint:gpu': { 'triad': (44500, -0.05, None, 'MB/s') }, 'daint:mc': { 'triad': (88500, -0.05, None, 'MB/s') }, 'dom:gpu': { 'triad': (44500, -0.05, None, 'MB/s') }, 'dom:mc': { 'triad': (88500, -0.05, None, 'MB/s') }, } } self.tags = {'production', 'craype'} self.maintainers = ['RS', 'SK']
def validate(self): return sn.assert_found(r'Hello, World\!', self.stdout)
def __init__(self, mpi_task): # {{{ pe self.descr = 'Tool validation' self.valid_prog_environs = [ 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi', 'PrgEnv-cray' ] # self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_systems = ['*'] self.maintainers = ['JG'] self.tags = {'sph', 'hpctools', 'cpu'} # }}} # {{{ compile self.testname = 'sqpatch' self.tool = 'ddt' # need to set version to avoid module load to fail. self.tool_v = '20.1.1-Suse-15.0' tc_ver = '20.08' self.tool_modules = { 'PrgEnv-gnu': [f'CrayGNU/.{tc_ver}', f'{self.tool}/{self.tool_v}'], 'PrgEnv-intel': [f'CrayIntel/.{tc_ver}', f'{self.tool}/{self.tool_v}'], 'PrgEnv-cray': [f'CrayCCE/.{tc_ver}', f'{self.tool}/{self.tool_v}'], 'PrgEnv-pgi': [f'CrayPGI/.{tc_ver}', f'{self.tool}/{self.tool_v}'], } self.prgenv_flags = { 'PrgEnv-gnu': [ '-I.', '-I./include', '-std=c++14', '-g', '-O0', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-intel': [ '-I.', '-I./include', '-std=c++14', '-g', '-O0', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-cray': [ '-I.', '-I./include', '-std=c++17', '-g', '-O0', '-DUSE_MPI', '-DNDEBUG' ], 'PrgEnv-pgi': [ '-I.', '-I./include', '-std=c++14', '-g', '-O0', '-DUSE_MPI', '-DNDEBUG' ], } self.build_system = 'SingleSource' self.sourcepath = f'{self.testname}.cpp' self.executable = f'./{self.testname}.exe' # make mpi ranks > 2 crash after second iteration: self.step_abort = 1 insert_abort = (r'"/sph::computeMomentumAndEnergyIAD/a if (d.rank > 2 ' r'&& d.iteration > %s)' r' { MPI::COMM_WORLD.Abort(0); }"' % self.step_abort) self.prebuild_cmds = [ 'module rm xalt', 'module list -t', 'sed -i %s %s' % (insert_abort, self.sourcepath), ] # }}} # {{{ run ompthread = 1 self.num_tasks = mpi_task self.cubeside = cubeside_dict[mpi_task] self.steps = steps_dict[mpi_task] self.name = 'sphexa_ddt_{}_{:03d}mpi_{:03d}omp_{}n_{}steps'.format( self.testname, mpi_task, ompthread, self.cubeside, self.steps) self.num_tasks_per_node = 24 self.num_cpus_per_task = ompthread self.num_tasks_per_core = 2 self.use_multithreading = True self.exclusive = True self.time_limit = '10m' self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task), } self.executable_opts = [ f'-n {self.cubeside}', f'-s {self.steps}', '2>&1' ] self.version_rpt = 'version.rpt' self.which_rpt = 'which.rpt' self.prerun_cmds = [ 'module rm xalt', f'{self.tool} --version &> {self.version_rpt}', f'which {self.tool} &> {self.which_rpt}', ] self.prerun_cmds += ['echo starttime=`date +%s`'] self.txt_rpt = 'rpt_ddt.txt' self.htm_rpt = 'rpt_ddt.html' self.postrun_cmds = [ 'echo stoptime=`date +%s`', # htm2txt is the same as running with --output=rpt.txt, see hook 'w3m -dump %s > %s' % (self.htm_rpt, self.txt_rpt), ] # }}} # {{{ sanity # --- "0-23 Add tracepoint for sqpatch.cpp:75" regex_addtp = r' 0-\d+\s+Add tracepoint for \S+:\d+' # --- "* number of processes : 24" # ^^ regex_np = r'\* number of processes\s+:\s(?P<nprocs>\d+)' res_np = sn.extractsingle(regex_np, self.txt_rpt, 'nprocs', int) # --- # Time Tracepoint Processes Values # main(int, domain.clist[0]: Sparkline from 0 to 19286 domain.clist: # ^^^^^ regex_tpoint = r'from 0 to (?P<maxi>\d+)' rest = sn.count(sn.extractall(regex_tpoint, self.txt_rpt, 'maxi', int)) # --- # Time Tracepoint Processes Values # main(int, domain.clist[0]: Sparkline from 0 to 19286 domain.clist: # 1 0:08.098 char**) 0-23 std::vector of length 0, # capacity 1786 # ^^^^ regex_cap = r'domain\.clist:\s*\n?.*capacity\s+(?P<cap>\d+)' # --- "3-23 MPI::Comm::Abort virtual void Abort" = 21/24 core crashed regex_abort = r'^\s+(?P<rk0>\d+)-(?P<rkn>\d+).*MPI::Comm::Abort\s+' res_rk0 = sn.extractsingle(regex_abort, self.txt_rpt, 'rk0', int) res_rkn = sn.extractsingle(regex_abort, self.txt_rpt, 'rkn', int) res_rk = res_rkn - res_rk0 + 1 # --- self.sanity_patterns = sn.all([ sn.assert_found(regex_addtp, self.txt_rpt), sn.assert_eq(self.num_tasks, res_np), sn.assert_found(regex_cap, self.txt_rpt), sn.assert_eq(self.step_abort + 2, rest), sn.assert_eq(self.num_tasks - 3, res_rk), ])
def assert_version(self): return sn.assert_found(r'Version 1.0.6', self.stderr)
def set_sanity_patterns(self): self.sanity_patterns = sn.assert_found(r'^8', self.stdout)
def set_sanity_patterns(self): return sn.assert_found(r'nvcc:\s+NVIDIA', self.stdout)
def set_sanity_patterns(self): self.sanity_patterns = sn.assert_found( r'(exceeded memory limit)|(Out Of Memory)', self.stderr)
def __init__(self, version, **kwargs): super().__init__( 'dot_%s_example' % version.replace('+', '_'), **kwargs) self.sourcepath = 'shared/' self.executable = './shared/dot.%s' % version.replace('+', '.') self.sanity_patterns = sn.assert_found('success', self.stdout)
def assert_exec_exists(self): return sn.assert_found(r'affinity', self.stdout)
def __init__(self, lang): super().__init__() self.name = 'jacobi_perftools_%s' % lang.replace('+', 'p') self.descr = '%s check' % lang if lang != 'Cuda': self.valid_systems = ['daint:gpu', 'dom:gpu', 'daint:mc', 'dom:mc'] else: self.valid_systems = ['daint:gpu', 'dom:gpu'] self.valid_prog_environs = [ 'PrgEnv-cray', 'PrgEnv-gnu', 'PrgEnv-intel', 'PrgEnv-pgi' ] if lang == 'Cpp': self.sourcesdir = os.path.join('src', 'C++') else: self.sourcesdir = os.path.join('src', lang) self.modules = ['craype-accel-nvidia60', 'perftools-lite'] self.build_system = 'Make' # NOTE: Restrict concurrency to allow creation of Fortran modules if lang == 'F90': self.build_system.max_concurrency = 1 self.prgenv_flags = { 'PrgEnv-cray': ['-O2', '-g', '-h nomessage=3140', '-homp'], 'PrgEnv-gnu': ['-O2', '-g', '-fopenmp'], 'PrgEnv-intel': ['-O2', '-g', '-qopenmp'], 'PrgEnv-pgi': ['-O2', '-g', '-mp'] } self.num_iterations = 200 if lang == 'Cuda': self.build_system.options = [ 'NVCCFLAGS="-arch=sm_60"', 'DDTFLAGS="-DUSE_MPI -D_CSCS_ITMAX=%s"' % self.num_iterations, 'LIB=-lstdc++' ] self.executable = 'jacobi' # NOTE: Reduce time limit because for PrgEnv-pgi even if the output # is correct, the batch job uses all the time. self.time_limit = (0, 5, 0) self.num_tasks = 3 self.num_tasks_per_node = 3 self.num_cpus_per_task = 4 if lang == 'Cuda': self.num_gpus_per_node = 1 self.num_tasks = 1 self.num_tasks_per_node = 1 self.num_cpus_per_task = 1 self.variables = { 'ITERATIONS': str(self.num_iterations), 'OMP_NUM_THREADS': str(self.num_cpus_per_task), 'OMP_PROC_BIND': 'true', 'CRAYPE_LINK_TYPE': 'dynamic' } if self.num_tasks == 1: # will be fixed in perftools/7.1 self.variables['PAT_RT_REPORT_METHOD'] = 'pe' self.sanity_patterns = sn.assert_found('Table 1: Profile by Function', self.stdout) self.maintainers = ['MK', 'JG'] self.tags = {'production'}
def test_assert_found_encoding(utf16_file): assert sn.assert_found('Odyssey', utf16_file, encoding='utf-16')
def assert_4GB(self): return sn.assert_found('4096 MB', self.stdout)
def __init__(self, variant): self.descr = 'Distributed training with TensorFlow2 and Horovod' self.valid_systems = ['daint:gpu'] self.valid_prog_environs = ['builtin'] self.modules = ['Horovod/0.18.1-CrayGNU-19.10-tf-2.0.0'] self.sourcesdir = None self.num_tasks_per_node = 1 self.num_cpus_per_task = 12 if variant == 'small': self.valid_systems += ['dom:gpu'] self.num_tasks = 8 self.reference = { 'dom:gpu': { 'throughput': (2031.6, -0.05, None, 'images/s'), 'throughput_per_gpu': (253.9, -0.05, None, 'images/s'), }, 'daint:gpu': { 'throughput': (2031.6, -0.05, None, 'images/s'), 'throughput_per_gpu': (253.9, -0.05, None, 'images/s') }, } else: self.num_tasks = 32 self.reference = { 'daint:gpu': { 'throughput': (7976.6, -0.05, None, 'images/s'), 'throughput_per_gpu': (253.9, -0.05, None, 'images/s') }, } self.perf_patterns = { 'throughput': sn.extractsingle( r'Total img/sec on %s GPU\(s\): ' r'(?P<throughput>\S+) \S+' % self.num_tasks, self.stdout, 'throughput', float), 'throughput_per_gpu': sn.extractsingle( r'Img/sec per GPU: (?P<throughput_per_gpu>\S+) \S+', self.stdout, 'throughput_per_gpu', float) } model = 'InceptionV3' batch_size = 64 self.sanity_patterns = sn.all([ sn.assert_found(r'Model: %s' % model, self.stdout), sn.assert_found(r'Batch size: %s' % batch_size, self.stdout) ]) self.variables = { 'NCCL_DEBUG': 'INFO', 'NCCL_IB_HCA': 'ipogif0', 'NCCL_IB_CUDA_SUPPORT': '1', 'OMP_NUM_THREADS': '$SLURM_CPUS_PER_TASK', } self.pre_run = ['wget https://raw.githubusercontent.com/horovod/' 'horovod/26b55a7890f6923ca58cdb68a765ed0ec436ab0f/' 'examples/tensorflow2_synthetic_benchmark.py'] self.executable = 'python' self.executable_opts = [ 'tensorflow2_synthetic_benchmark.py', '--model %s' % model, '--batch-size %s' % batch_size, ] self.tags = {'production'} self.maintainers = ['RS', 'TR']
def assert_release(self): os_release_pattern = r'18.04.\d+ LTS \(Bionic Beaver\)' return sn.assert_found(os_release_pattern, 'release.txt')
def assert_solution(self): return sn.assert_found(r'After Amesos solution', self.stdout)
def dummy_sanity(self): return sn.assert_found(r'success', sanity_file)
def validate_test(self): return sn.assert_found(r'^8', self.stdout)
def validate(self): return sn.assert_found(self.message, self.stdout)
def validate_run(self): return sn.assert_found(r'PASSED', self.stdout)
def set_sanity(self): self.sanity_patterns = sn.assert_found(r'Hello, World\!', self.stdout)
def __init__(self, iw, repeat, cache): super().__init__() self.name = 'roofline_gpp_P100_iw{}_repeat{}_{}cache'.format( iw, repeat, cache) self.valid_systems = ['dom:gpu'] self.valid_prog_environs = ['PrgEnv-gnu'] self.modules = ['craype-accel-nvidia60'] self.prebuild_cmd = [ 'cd GPP/Volta', # Pascal P100 GPU: 'sed -i "s-sm_70-sm_60-" Makefile', # fma (fmad=true) vs nofma (fmad=false): 'sed -i "s/fmad=.*/fmad=true/g" Makefile', # iw (loop size): 'sed -i "s/#define nend.*/#define nend %s/g" GPUComplex.h' % iw, ] self.executable = './fma_iw{}_rep{}_{}.exe'.format(iw, repeat, cache) self.build_system.options = ['EXE=../../%s' % self.executable] # 1: <n_bands> 2: <n_valence_bands> 3: <n_plane_waves> # 4: <nodes_per_mpi_group> 5: <stride> self.executable_opts = ['512', '2', '32768', '20', '0'] self.exclusive = True self.num_tasks = 1 self.num_tasks_per_node = 1 self.num_cpus_per_task = 1 self.num_tasks_per_core = 1 self.use_multithreading = False self.variables = { 'CRAYPE_LINK_TYPE': 'dynamic', 'OMP_NUM_THREADS': str(self.num_cpus_per_task) } self.nvprof_metrics = { 'L1': [ 'flop_count_dp', 'gld_transactions', 'gst_transactions', 'atomic_transactions', 'local_load_transactions', 'local_store_transactions', 'shared_load_transactions', 'shared_store_transactions' ], 'L2': ['flop_count_dp', 'l2_read_transactions', 'l2_write_transactions'], 'HBM': [ 'flop_count_dp', 'dram_read_transactions', 'dram_write_transactions' ], 'PCIe/NVLINK': [ 'flop_count_dp', 'system_read_transactions', 'system_write_transactions' ] } sep = ' --metrics ' nvmetrics = sep.join(self.nvprof_metrics[cache]) self.post_run = [ 'nvprof --kernels "NumBandNgpown_kernel" --metrics %s %s %s' % (nvmetrics, self.executable, ' '.join(self.executable_opts)) ] # References for Nvidia P100 (HBM, iw=6): gflops = 2796.6 ai = 13.6 self.sanity_patterns = sn.all([ sn.assert_found('P100-PCIE-16GB', self.stderr), sn.assert_reference(self.gflops_per_seconds, gflops, -0.5, 0.5), sn.assert_reference(self.arithmetic_intensity, ai, -0.5, 0.5), ])
def set_sanity(self): return sn.assert_found(r'Hello, World\!', self.stdout)
def __init__(self, variant): # Check if this is a device check self.descr = 'GridTools test base' self.valid_prog_environs = ['PrgEnv-gnu'] self.modules = ['CMake', 'Boost'] is_cuda_test = 'cuda' in variant if is_cuda_test: self.modules.append('craype-accel-nvidia60') self.sourcesdir = 'https://github.com/GridTools/gridtools.git' self.build_system = 'CMake' self.build_system.config_opts = [ '-DBoost_NO_BOOST_CMAKE="true"', '-DCMAKE_BUILD_TYPE:STRING=Release', '-DBUILD_SHARED_LIBS:BOOL=ON', '-DGT_GCL_ONLY:BOOL=OFF', '-DCMAKE_CXX_COMPILER=CC', '-DGT_USE_MPI:BOOL=OFF', '-DGT_SINGLE_PRECISION:BOOL=OFF', '-DGT_ENABLE_PERFORMANCE_METERS:BOOL=ON', '-DGT_TESTS_ICOSAHEDRAL_GRID:BOOL=OFF', '-DCMAKE_EXPORT_COMPILE_COMMANDS=ON', '-DBOOST_ROOT=$BOOST_ROOT', '-DGT_ENABLE_PYUTILS=OFF', '-DGT_TESTS_REQUIRE_FORTRAN_COMPILER=ON', '-DGT_TESTS_REQUIRE_C_COMPILER=ON', '-DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON' ] if is_cuda_test: self.build_system.config_opts += [ '-DGT_ENABLE_BACKEND_X86:BOOL=OFF', '-DGT_ENABLE_BACKEND_NAIVE:BOOL=OFF', '-DGT_ENABLE_BACKEND_MC=OFF', '-DGT_ENABLE_BACKEND_CUDA:BOOL=ON', '-DCUDA_ARCH:STRING=sm_60', '-DCMAKE_CUDA_HOST_COMPILER:STRING=CC' ] else: self.build_system.config_opts += [ '-DGT_ENABLE_BACKEND_X86:BOOL=ON', '-DGT_ENABLE_BACKEND_NAIVE:BOOL=ON', '-DGT_ENABLE_BACKEND_MC=ON', '-DGT_ENABLE_BACKEND_CUDA:BOOL=OFF' ] self.valid_systems = ['daint:gpu', 'dom:gpu'] if is_cuda_test: self.num_gpus_per_node = 1 self.num_tasks = 1 else: self.valid_systems += ['daint:mc', 'dom:mc'] self.num_gpus_per_node = 0 self.num_tasks = 1 self.sanity_patterns = sn.assert_found(r'PASSED', self.stdout) self.perf_patterns = { 'wall_time': sn.extractsingle(r'(?P<timer>\w+) ms total', self.stdout, 'timer', int) } self.build_system.max_concurrency = 2 self.variant_data = { 'vertical_advection_dycore_naive': { 'executable_opts': ['150', '150', '150'], 'reference': { 'daint:mc': { 'wall_time': (3400, None, 0.1, 'ms') }, 'daint:gpu': { 'wall_time': (3800, None, 0.1, 'ms') }, 'dom:mc': { 'wall_time': (3400, None, 0.1, 'ms') }, 'dom:gpu': { 'wall_time': (3800, None, 0.1, 'ms') }, '*': { 'wall_time': (0, None, None, 'ms') } } }, 'vertical_advection_dycore_mc': { 'executable_opts': ['150', '150', '150'], 'reference': { 'daint:mc': { 'wall_time': (3500, None, 0.1, 'ms') }, 'daint:gpu': { 'wall_time': (3700, None, 0.1, 'ms') }, 'dom:mc': { 'wall_time': (3500, None, 0.1, 'ms') }, 'dom:gpu': { 'wall_time': (3700, None, 0.1, 'ms') }, '*': { 'wall_time': (0, None, None, 'ms') } } }, 'simple_hori_diff_naive': { 'executable_opts': ['100', '100', '100'], 'reference': { 'daint:mc': { 'wall_time': (3200, None, 0.1, 'ms') }, 'daint:gpu': { 'wall_time': (3700, None, 0.1, 'ms') }, 'dom:mc': { 'wall_time': (3200, None, 0.1, 'ms') }, 'dom:gpu': { 'wall_time': (3700, None, 0.1, 'ms') }, '*': { 'wall_time': (0, None, None, 'ms') } } }, 'simple_hori_diff_mc': { 'executable_opts': ['100', '100', '100'], 'reference': { 'daint:mc': { 'wall_time': (3300, None, 0.1, 'ms') }, 'daint:gpu': { 'wall_time': (3700, None, 0.1, 'ms') }, 'dom:mc': { 'wall_time': (3300, None, 0.1, 'ms') }, 'dom:gpu': { 'wall_time': (3700, None, 0.1, 'ms') }, '*': { 'wall_time': (0, None, None, 'ms') } } }, 'vertical_advection_dycore_cuda': { 'executable_opts': ['200', '200', '200'], 'reference': { 'daint:gpu': { 'wall_time': (12000, None, 0.1, 'ms') }, 'dom:gpu': { 'wall_time': (12000, None, 0.1, 'ms') }, '*': { 'wall_time': (0, None, None, 'ms') } } }, 'simple_hori_diff_cuda': { 'executable_opts': ['150', '150', '150'], 'reference': { 'daint:gpu': { 'wall_time': (19000, None, 0.1, 'ms') }, 'dom:gpu': { 'wall_time': (19000, None, 0.1, 'ms') }, '*': { 'wall_time': (0, None, None, 'ms') } } } } self.build_system.make_opts = [variant] self.executable = os.path.join('regression', variant) self.executable_opts = self.variant_data[variant]['executable_opts'] self.reference = self.variant_data[variant]['reference'] self.tags = {'scs', 'benchmark'} self.maintainers = ['CB']
def validate(self): return sn.assert_found(r'warning', self.stderr)