Exemplo n.º 1
0
 def __init__(self, num_ranks, arraysize):
     super().__init__()
     ompthread = 36 // num_ranks
     self.valid_systems = ['dom:mc']
     self.valid_prog_environs = ['PrgEnv-intel']
     self.build_system.cppflags = [
         '-D_SDE',
         '-DSTREAM_ARRAY_SIZE=%s' % arraysize, '-DNTIMES=50'
     ]
     self.exclusive = True
     self.num_tasks = num_ranks
     self.num_tasks_per_node = num_ranks
     self.num_cpus_per_task = ompthread
     self.num_tasks_per_core = 1
     self.use_multithreading = False
     self.name = 'sde_n.{:010d}_MPI.{:03d}_OpenMP.{:03d}_j.{:01d}'.format(
         arraysize, num_ranks, ompthread, self.num_tasks_per_core)
     self.variables = {
         'CRAYPE_LINK_TYPE': 'dynamic',
         'OMP_NUM_THREADS': str(self.num_cpus_per_task)
     }
     self.sdeflags = [
         '%s -d -iform 1 -omix %s -i -top_blocks 500 '
         '-global_region -start_ssc_mark 111:repeat '
         '-stop_ssc_mark 222:repeat -- %s' %
         ('-bdw', self.sde, self.target_executable)
     ]
     # References for Intel Broadwell CPU (E5-2695 v4):
     ai = 0.0825
     gflops = 9.773
     self.sanity_patterns = sn.all([
         sn.assert_reference(self.gflops, gflops, -0.1, 0.3),
         sn.assert_reference(self.arithmetic_intensity, ai, -0.1, 0.3),
     ])
Exemplo n.º 2
0
 def setup(self, partition, environ, **job_opts):
     self.modules = self.ipm_modules[environ.name]
     super().setup(partition, environ, **job_opts)
     environ_name = self.current_environ.name
     prgenv_flags = self.prgenv_flags[environ_name]
     self.build_system.cflags = prgenv_flags
     self.build_system.cxxflags = prgenv_flags
     self.build_system.fflags = prgenv_flags
     self.build_system.ldflags = [
         '-lm', '`pkg-config --libs papi`', '`pkg-config --libs pfm`',
         '${IPM}'
     ]
     self.htmlrpt = 'index.html'
     self.sanity_patterns = sn.all([
         # check the job:
         sn.assert_found('SUCCESS', self.stdout),
         # check the txt report:
         sn.assert_reference(
             sn.extractsingle(
                 r'^#\sPAPI_L1_TCM\s+(?P<totalmissesL1>\S\.\S+)',
                 self.txtrpt, 'totalmissesL1', float), 91159658, -0.1, 0.1),
         # check the html report:
         sn.assert_reference(
             sn.extractsingle(
                 r'^<tr><td>\sPAPI_L1_TCM\s<\/td><td\salign=right>\s'
                 r'(?P<totalmissesL1>\d+)', self.htmlrpt, 'totalmissesL1',
                 float), 91159658, -0.1, 0.1),
     ])
Exemplo n.º 3
0
    def check_performance(self):
        """The performance checking phase of the regression test pipeline.

        :raises reframe.core.exceptions.SanityError: If the performance check
            fails.
        """
        if self.perf_patterns is None:
            return

        with os_ext.change_dir(self._stagedir):
            # We first evaluate and log all performance values and then we
            # check them against the reference. This way we always log them
            # even if the don't meet the reference.
            perf_values = []
            for tag, expr in self.perf_patterns.items():
                value = evaluate(expr)
                key = '%s:%s' % (self._current_partition.fullname, tag)
                if key not in self.reference:
                    raise SanityError(
                        "tag `%s' not resolved in references for `%s'" %
                        (tag, self._current_partition.fullname))

                perf_values.append((value, self.reference[key]))
                self._perf_logger.log_performance(logging.INFO, tag, value,
                                                  *self.reference[key])

            for val, reference in perf_values:
                ref, low_thres, high_thres, *_ = reference
                evaluate(assert_reference(val, ref, low_thres, high_thres))
Exemplo n.º 4
0
    def __init__(self, output_file):
        super().__init__()

        self.valid_prog_environs = ['PrgEnv-gnu']
        self.executable = 'gmx_mpi'

        self.keep_files = [output_file]

        energy = sn.extractsingle(
            r'\s+Potential\s+Kinetic En\.\s+Total Energy'
            r'\s+Conserved En\.\s+Temperature\n'
            r'(\s+\S+){2}\s+(?P<energy>\S+)(\s+\S+){2}\n'
            r'\s+Pressure \(bar\)\s+Constr\. rmsd',
            output_file,
            'energy',
            float,
            item=-1)
        energy_reference = -12071400.0

        self.sanity_patterns = sn.all([
            sn.assert_found('Finished mdrun', output_file),
            sn.assert_reference(energy, energy_reference, -0.01, 0.01)
        ])

        self.perf_patterns = {
            'perf':
            sn.extractsingle(r'Performance:\s+(?P<perf>\S+)', output_file,
                             'perf', float)
        }

        self.maintainers = ['*****@*****.**']
        self.strict_check = False
        self.use_multithreading = False
        self.extra_resources = {'switches': {'num_switches': 1}}
        self.tags = {'applications', 'performance'}
Exemplo n.º 5
0
    def __init__(self):
        super().__init__('wide_deep')

        train_epochs = 10
        self.executable = 'python3 ./official/wide_deep/census_main.py'
        self.executable_opts = [
            '--data_dir', './official/wide_deep/', '--model_dir',
            './official/wide_deep/model_dir', '--train_epochs',
            str(train_epochs)
        ]

        self.sanity_patterns = sn.all([
            sn.assert_found(r'Finished evaluation at', self.stderr),
            sn.assert_reference(
                sn.extractsingle(
                    r"Results at epoch %s[\s\S]+accuracy:\s+(?P<accuracy>\S+)"
                    % train_epochs, self.stderr, 'accuracy', float, -1), 0.85,
                -0.05, None)
        ])

        self.pre_run += [
            'mkdir ./official/wide_deep/model_dir',
            'python3 ./official/wide_deep/census_dataset.py '
            '--data_dir ./official/wide_deep/'
        ]
Exemplo n.º 6
0
    def __init__(self, **kwargs):
        super().__init__('gpu_direct_cuda_check', os.path.dirname(__file__),
                         **kwargs)
        self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
        self.valid_prog_environs = ['PrgEnv-gnu']
        if self.current_system.name in ['daint', 'dom']:
            self.variables = {'MPICH_RDMA_ENABLED_CUDA': '1'}
        elif self.current_system.name in ['kesch']:
            self.valid_prog_environs = ['PrgEnv-gnu-gdr']
            self.variables = {
                'MPICH_RDMA_ENABLED_CUDA': '1',
                'MV2_USE_CUDA': '1',
                'MV2_USE_GPUDIRECT': '1',
                'MPICH_G2G_PIPELINE': '1',
                'G2G': '1'
            }

        self.num_tasks = 2
        self.num_gpus_per_node = 1
        self.sourcepath = 'gpu_direct_cuda.cu'
        self.num_tasks_per_node = 1

        self.modules = ['cudatoolkit']

        result = sn.extractsingle(r'Result :\s+(?P<result>\d+\.?\d*)',
                                  self.stdout, 'result', float)
        self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5)

        self.maintainers = ['AJ', 'VK']
        self.tags = {'production'}
Exemplo n.º 7
0
    def __init__(self, lang, extension, **kwargs):
        super().__init__(lang, extension, **kwargs)
        self.valid_systems = [
            'daint:gpu', 'daint:mc', 'dom:gpu', 'dom:mc', 'kesch:cn'
        ]

        if self.current_system.name == 'kesch' and self.lang == 'C':
            self.flags += ' -lm '

        residual_pattern = '_jacobi.%s:%d,residual'
        self.ddt_options = [
            '--offline', '--output=ddtreport.txt', '--trace-at',
            residual_pattern %
            (self.extension, self.instrumented_linenum[self.lang])
        ]

        self.sanity_patterns = sn.all([
            sn.assert_found('MPI implementation', 'ddtreport.txt'),
            sn.assert_found(r'Debugging\s*:\s*srun\s+%s' % self.executable,
                            'ddtreport.txt'),
            sn.assert_reference(
                sn.extractsingle(
                    r'^tracepoint\s+.*\s+residual:\s+(?P<result>\S+)',
                    'ddtreport.txt', 'result', float), 2.572e-6, -1e-1,
                1.0e-1),
            sn.assert_found(r'Every process in your program has terminated\.',
                            'ddtreport.txt')
        ])
Exemplo n.º 8
0
    def __init__(self, output_file):
        super().__init__()

        self.valid_prog_environs = ['PrgEnv-gnu']
        self.executable = 'castep.mpi'

        self.keep_files = [output_file]

        energy = sn.extractsingle(r'Final energy, E\s+=\s+(?P<energy>\S+)',
                                  output_file, 'energy', float, item=-1)
        energy_reference = -77705.21093039

        self.sanity_patterns = sn.all([
            sn.assert_found('Total time', output_file),
            sn.assert_reference(energy, energy_reference, -0.01, 0.01)
        ])

        self.perf_patterns = {
            'runtime': sn.extractsingle(r'Total time\s+=\s+(?P<runtime>\S+)',
                                     output_file, 'runtime', float),
            
            'calctime': sn.extractsingle(r'Calculation time\s+=\s+(?P<calctime>\S+)',
                                     output_file, 'calctime', float)
        }

        self.maintainers = ['*****@*****.**']
        self.strict_check = False
        self.use_multithreading = False
        self.extra_resources = {
            'switches': {
                'num_switches': 1
            }
        }
        self.tags = {'applications','performance'}
Exemplo n.º 9
0
    def __init__(self):
        self.descr = 'tests gpu-direct for CUDA'
        self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn', 'tiger:gpu']
        self.valid_prog_environs = ['PrgEnv-gnu']
        self.sourcepath = 'gpu_direct_cuda.cu'
        self.build_system = 'SingleSource'
        self.build_system.ldflags = ['-lcublas', '-lcudart']
        if self.current_system.name in ['daint', 'dom', 'tiger']:
            self.modules = ['craype-accel-nvidia60']
            self.variables = {'MPICH_RDMA_ENABLED_CUDA': '1'}
            self.build_system.cxxflags = ['-ccbin CC', '-arch=sm_60']
        elif self.current_system.name == 'kesch':
            self.exclusive_access = True
            self.valid_prog_environs = ['PrgEnv-gnu']
            self.modules = ['cudatoolkit/8.0.61']
            self.variables = {
                'MV2_USE_CUDA': '1',
                'G2G': '1',
            }
            self.build_system.cxxflags = ['-ccbin', 'mpicxx', '-arch=sm_37']

        self.num_tasks = 2
        self.num_gpus_per_node = 1
        self.num_tasks_per_node = 1
        result = sn.extractsingle(r'Result :\s+(?P<result>\d+\.?\d*)',
                                  self.stdout, 'result', float)
        self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5)
        self.maintainers = ['AJ', 'MKr']
        self.tags = {'production', 'mch', 'craype'}
Exemplo n.º 10
0
    def __init__(self, variant):
        if variant == 'nompi':
            self.num_tasks = 1
        else:
            self.num_tasks = 2

        self.valid_systems = ['daint:gpu', 'dom:gpu', 'arolla:cn', 'tsa:cn']
        self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
        self.sourcesdir = 'src/openacc'
        if self.num_tasks == 1:
            self.sourcepath = 'vecAdd_openacc_nompi.f90'
            if self.current_system.name in ['arolla', 'tsa']:
                self.valid_prog_environs = ['PrgEnv-pgi-nompi']
        else:
            self.sourcepath = 'vecAdd_openacc_mpi.f90'

        if self.current_system.name in ['daint', 'dom']:
            self.modules = ['craype-accel-nvidia60']
        elif self.current_system.name in ['arolla', 'tsa']:
            self.exclusive_access = True
            self.variables = {
                'CRAY_ACCEL_TARGET': 'nvidia70',
                'MV2_USE_CUDA': '1'
            }

        self.executable = self.name
        self.build_system = 'SingleSource'
        self.num_gpus_per_node = 1
        self.num_tasks_per_node = 1
        result = sn.extractsingle(r'final result:\s+(?P<result>\d+\.?\d*)',
                                  self.stdout, 'result', float)
        self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5)

        self.maintainers = ['TM', 'AJ']
        self.tags = {'production', 'craype'}
Exemplo n.º 11
0
    def __init__(self, output_file):
        super().__init__()

        self.valid_prog_environs = ['PrgEnv-gnu']
        self.executable = 'cp2k.psmp'

        self.keep_files = [output_file]

        energy = sn.extractsingle(
            r'ENERGY\| Total FORCE_EVAL \( QS \) energy \[a.u.\]:'
            r'\s+(?P<energy>\S+)', output_file, 'energy', float)
        energy_reference = -870.934788

        self.sanity_patterns = sn.all([
            sn.assert_found('CP2K   ', output_file),
            sn.assert_reference(energy, energy_reference, -1.E-06, +1.0E-06)
        ])

        self.perf_patterns = {
            'perf':
            sn.extractsingle(r'\s+CP2K   '
                             r'(\s+\S+){5}\s+(?P<perf>\S+)', output_file,
                             'perf', float)
        }

        self.maintainers = ['*****@*****.**']
        self.strict_check = False
        self.use_multithreading = False
        self.extra_resources = {'switches': {'num_switches': 1}}
        self.tags = {'applications', 'performance'}
Exemplo n.º 12
0
    def __init__(self, variant, **kwargs):
        super().__init__('quantum_espresso_%s_check' % variant,
                         os.path.dirname(__file__), **kwargs)
        self.descr = 'Quantum Espresso check (%s)' % variant
        self.valid_prog_environs = ['PrgEnv-intel']
        self.modules = ['QuantumESPRESSO']
        self.sourcesdir = os.path.join(self.current_system.resourcesdir,
                                       'Espresso')
        self.executable = 'pw.x'
        self.executable_opts = '-in ausurf.in'.split()
        energy = sn.extractsingle(r'!\s+total energy\s+=\s+(?P<energy>\S+) Ry',
                                  self.stdout, 'energy', float)
        self.sanity_patterns = sn.all([
            sn.assert_found(r'convergence has been achieved', self.stdout),
            sn.assert_reference(energy, -11427.08612278, -1e-10, 1e-10)
        ])
        self.perf_patterns = {
            'sec':
            sn.extractsingle(r'electrons    :\s+(?P<sec>\S+)s CPU ',
                             self.stdout, 'sec', float)
        }
        self.use_multithreading = True
        if self.current_system.name == 'dom':
            self.num_tasks = 216
            self.num_tasks_per_node = 36
        else:
            self.num_tasks = 576
            self.num_tasks_per_node = 36

        self.maintainers = ['AK', 'LM']
        self.strict_check = False
        self.extra_resources = {'switches': {'num_switches': 1}}
Exemplo n.º 13
0
    def __init__(self, **kwargs):
        super().__init__('wide_deep', **kwargs)

        train_epochs = 10
        self.executable = 'python3 ./official/wide_deep/wide_deep.py'
        self.executable_opts = [
            '--train_data', './official/wide_deep/adult.data', '--test_data',
            './official/wide_deep/adult.test', '--model_dir',
            './official/wide_deep/model_dir', '--train_epochs',
            str(train_epochs)
        ]

        self.sanity_patterns = sn.all([
            sn.assert_found(r'INFO:tensorflow:Finished evaluation at',
                            self.stderr),
            sn.assert_reference(
                sn.extractsingle(
                    r"Results at epoch %s[\s\S]+accuracy:\s+(?P<accuracy>\S+)"
                    % train_epochs, self.stdout, 'accuracy', float), 0.85,
                -0.05, None)
        ])

        self.pre_run += [
            'mkdir ./official/wide_deep/model_dir',
            'python3 ./official/wide_deep/data_download.py '
            '--data_dir ./official/wide_deep/'
        ]
Exemplo n.º 14
0
    def __init__(self):
        if self.current_system.name in ['eiger', 'pilatus']:
            self.valid_prog_environs = ['cpeIntel']
        else:
            self.valid_prog_environs = ['builtin']

        self.modules = ['VASP']
        force = sn.extractsingle(r'1 F=\s+(?P<result>\S+)',
                                 self.stdout, 'result', float)
        self.sanity_patterns = sn.assert_reference(
            force, -.85026214E+03, -1e-5, 1e-5
        )
        self.keep_files = ['OUTCAR']
        self.perf_patterns = {
            'time': sn.extractsingle(r'Total CPU time used \(sec\):'
                                     r'\s+(?P<time>\S+)', 'OUTCAR',
                                     'time', float)
        }
        self.maintainers = ['LM']
        self.tags = {'scs'}
        self.strict_check = False
        self.extra_resources = {
            'switches': {
                'num_switches': 1
            }
        }
Exemplo n.º 15
0
    def __init__(self, num_tasks, **kwargs):
        if num_tasks == 1:
            check_name = 'openacc_fortran_check'
        else:
            check_name = 'openacc_mpi_fortran_check'
        super().__init__(check_name, os.path.dirname(__file__), **kwargs)
        self.valid_systems = ['daint:gpu', 'dom:gpu', 'kesch:cn']
        self.valid_prog_environs = ['PrgEnv-cray', 'PrgEnv-pgi']
        if self.current_system.name in ['daint', 'dom']:
            self.modules = ['craype-accel-nvidia60']
            self._pgi_flags = '-acc -ta=tesla:cc60'
        elif self.current_system.name in ['kesch']:
            self.modules = ['craype-accel-nvidia35']
            self._pgi_flags = '-acc -ta=tesla:cc35'

        self.num_tasks = num_tasks
        if self.num_tasks == 1:
            self.sourcepath = 'vecAdd_openacc.f90'
        else:
            self.sourcepath = 'vecAdd_openacc_mpi.f90'
        self.num_gpus_per_node = 1
        self.executable = self.name
        self.num_tasks_per_node = 1

        result = sn.extractsingle(r'final result:\s+(?P<result>\d+\.?\d*)',
                                  self.stdout, 'result', float)
        self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5)

        self.maintainers = ['TM', 'VK']
        self.tags = {'production'}
Exemplo n.º 16
0
    def report_slow_nodes(self):
        '''Report the base perf metrics and also all the slow nodes.'''

        # Only report the nodes that don't meet the perf reference
        with osext.change_dir(self.stagedir):
            key = f'{self.current_partition.fullname}:min_perf'
            if key in self.reference:
                regex = r'\[(\S+)\] GPU\s+\d\(OK\): (\d+) GF/s'
                nids = set(sn.extractall(regex, self.stdout, 1))

                # Get the references
                ref, lt, ut, *_ = self.reference[key]

                # Flag the slow nodes
                for nid in nids:
                    try:
                        node_perf = self.min_perf(nid)
                        val = node_perf.evaluate(cache=True)
                        sn.assert_reference(val, ref, lt, ut).evaluate()
                    except SanityError:
                        self.perf_variables[nid] = node_perf
Exemplo n.º 17
0
    def __init__(self, scale):
        super().__init__()
        self.descr = 'Quantum Espresso CPU check'
        self.maintainers = ['AK', 'LM']
        self.tags = {'scs', 'production', 'external-resources'}
        self.sourcesdir = os.path.join(self.current_system.resourcesdir,
                                       'Espresso')

        self.valid_systems = ['daint:mc']
        self.valid_prog_environs = ['PrgEnv-intel']
        self.modules = ['QuantumESPRESSO']
        self.executable = 'pw.x'
        self.executable_opts = ['-in', 'ausurf.in']
        if scale == 'small':
            self.valid_systems += ['dom:mc']
            self.num_tasks = 216
            self.num_tasks_per_node = 36
            self.reference = {
                'dom:mc': {
                    'time': (159.0, None, 0.05, 's'),
                },
                'daint:mc': {
                    'time': (151.6, None, 0.05, 's')
                },
            }
        else:
            self.num_tasks = 576
            self.num_tasks_per_node = 36
            self.reference = {
                'daint:mc': {
                    'time': (157.0, None, 0.40, 's')
                },
            }

        self.use_multithreading = True
        self.extra_resources = {
            'switches': {
                'num_switches': 1
            }
        }

        self.strict_check = False
        energy = sn.extractsingle(r'!\s+total energy\s+=\s+(?P<energy>\S+) Ry',
                                  self.stdout, 'energy', float)
        self.sanity_patterns = sn.all([
            sn.assert_found(r'convergence has been achieved', self.stdout),
            sn.assert_reference(energy, -11427.09017162, -1e-10, 1e-10)
        ])
        self.perf_patterns = {
            'time': sn.extractsingle(r'electrons    :\s+(?P<sec>\S+)s CPU ',
                                     self.stdout, 'sec', float)
        }
Exemplo n.º 18
0
 def __init__(self):
     self.valid_systems = ['daint:gpu', 'dom:gpu']
     self.valid_prog_environs = ['PrgEnv-pgi']
     self.sourcepath = 'vecAdd_cuda.cuf'
     self.modules = ['craype-accel-nvidia60']
     self.build_system = 'SingleSource'
     self.build_system.fflags = ['-ta=tesla:cc60']
     self.num_gpus_per_node = 1
     result = sn.extractsingle(r'final result:\s+(?P<result>\d+\.?\d*)',
                               self.stdout, 'result', float)
     self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5)
     self.maintainers = ['TM', 'AJ']
     self.tags = {'production', 'craype'}
Exemplo n.º 19
0
 def __init__(self, lang, extension):
     super().__init__(lang, extension)
     self.valid_systems = ['dom:gpu', 'dom:mc', 'tiger:gpu']
     self.sanity_patterns = sn.all([
         sn.assert_reference(
             sn.extractsingle(r'^tst\{0\}:\s+(?P<result>\d+.\d+[eE]-\d+)',
                              'gdb4hpc.rpt', 'result', float), 2.572e-6,
             -1e-1, 1.0e-1),
         sn.assert_found(r'gdb4hpc \d\.\d - Cray Line Mode Parallel Debug',
                         'gdb4hpc.rpt'),
         sn.assert_found(r'Shutting down debugger and killing application',
                         'gdb4hpc.rpt')
     ])
Exemplo n.º 20
0
    def __init__(self, num_ranks, test_folder, input_file_name, use_sirius,
                 ref_energy, ref_time):
        super().__init__()
        self.descr = 'SCF check'
        self.valid_systems = ['osx', 'daint']
        self.valid_prog_environs = ['PrgEnv-gnu', 'PrgEnv-intel']

        self.num_tasks = num_ranks
        if self.current_system.name == 'daint':
            self.num_tasks_per_node = 1
            self.num_cpus_per_task = 12
            self.variables = {
                'OMP_NUM_THREADS': str(self.num_cpus_per_task),
                'MKL_NUM_THREADS': str(self.num_cpus_per_task)
            }

        self.executable = 'pw.x'
        self.sourcesdir = '../' + test_folder

        #self.sanity_patterns = sn.all([
        #    sn.assert_found(r'JOB DONE', self.stdout, msg="Calculation didn't converge"),
        #    #sn.assert_lt(energy_diff(fout, data_ref), 1e-5, msg="Total energy is different"),
        #    #sn.assert_lt(stress_diff(fout, data_ref), 1e-5, msg="Stress tensor is different"),
        #    #sn.assert_lt(forces_diff(fout, data_ref), 1e-5, msg="Atomic forces are different")
        #])

        self.executable_opts = [
            "-i %s" % input_file_name,
            "-npool %i" % num_ranks
        ]
        if use_sirius:
            self.executable_opts.append('-sirius')

        self.reference = {'daint:gpu': {'time': (ref_time, None, 0.02, 'sec')}}

        self.extra_resources = {'switches': {'num_switches': 1}}

        energy = sn.extractall(r'!\s+total energy\s+=\s+(?P<energy>\S+) Ry',
                               self.stdout, 'energy', float)[-1]

        self.sanity_patterns = sn.all([
            sn.assert_found(r'convergence has been achieved', self.stdout),
            sn.assert_reference(energy, ref_energy, -1e-9, 1e-9)
        ])

        self.perf_patterns = {
            'time':
            sn.extractsingle(
                r'global_timer\s+:\s+(?P<num>\S+)\s+(?P<time>\S+)',
                self.stdout, 'time', float)
        }
Exemplo n.º 21
0
    def assert_energy_readout(self):
        '''Assert that the obtained energy meets the benchmark tolerances.'''

        energy_fn_name = f'energy_{util.toalphanum(self.__bench).lower()}'
        energy_fn = getattr(self, energy_fn_name, None)
        sn.assert_true(
            energy_fn is not None,
            msg=(f"cannot extract energy for benchmark {self.__bench!r}: "
                 f"please define a member function '{energy_fn_name}()'")
        ).evaluate()
        energy = energy_fn()
        energy_diff = sn.abs(energy - self.energy_ref)
        return sn.all([
            sn.assert_found('Finished mdrun', 'md.log'),
            sn.assert_reference(energy, self.energy_ref,
                                -self.energy_tol, self.energy_tol)
        ])
Exemplo n.º 22
0
    def __init__(self):
        self.valid_systems = ['daint:gpu', 'dom:gpu']
        self.valid_prog_environs = ['PrgEnv-pgi']
        self.sourcepath = 'vecAdd_cuda.cuf'

        # FIXME: PGI 20.x does not support CUDA 11, see case #275674
        self.modules = [
            'craype-accel-nvidia60',
            'cudatoolkit/10.2.89_3.29-7.0.2.1_3.5__g67354b4'
        ]
        self.build_system = 'SingleSource'
        self.build_system.fflags = ['-ta=tesla:cc60']
        self.num_gpus_per_node = 1
        result = sn.extractsingle(r'final result:\s+(?P<result>\d+\.?\d*)',
                                  self.stdout, 'result', float)
        self.sanity_patterns = sn.assert_reference(result, 1., -1e-5, 1e-5)
        self.maintainers = ['TM', 'AJ']
        self.tags = {'production', 'craype'}
Exemplo n.º 23
0
    def add_sanity(self):
        assert_references = []
        for (perf_v, props) in sanity_config.assert_reference.items():
            assert_references.append(
                sn.assert_reference(
                    sn.extractsingle(props['pattern'],
                                     self.output_file,
                                     perf_v, float,
                                     item=-1),
                    props['ref_value'],
                    props['lower_thres'],
                    props['upper_thres']
                )
            )

        self.sanity_patterns = sn.all([
            *[sn.assert_found(af, self.output_file) for af in sanity_config.assert_found],
            *assert_references
        ])
Exemplo n.º 24
0
    def __init__(self, output_file):
        if self.current_system.name in ['eiger', 'pilatus']:
            self.valid_prog_environs = ['cpeGNU']
        else:
            self.valid_prog_environs = ['builtin']

        self.executable = 'gmx_mpi'

        # Reset sources dir relative to the SCS apps prefix
        self.sourcesdir = os.path.join(self.current_system.resourcesdir,
                                       'Gromacs', 'herflat')
        self.keep_files = [output_file]

        energy = sn.extractsingle(
            r'\s+Potential\s+Kinetic En\.\s+Total Energy'
            r'\s+Conserved En\.\s+Temperature\n'
            r'(\s+\S+){2}\s+(?P<energy>\S+)(\s+\S+){2}\n'
            r'\s+Pressure \(bar\)\s+Constr\. rmsd',
            output_file,
            'energy',
            float,
            item=-1)
        energy_reference = -3270799.9

        self.sanity_patterns = sn.all([
            sn.assert_found('Finished mdrun', output_file),
            sn.assert_reference(energy, energy_reference, -0.001, 0.001)
        ])

        self.perf_patterns = {
            'perf':
            sn.extractsingle(r'Performance:\s+(?P<perf>\S+)', output_file,
                             'perf', float)
        }

        self.modules = ['GROMACS']
        self.maintainers = ['VH', 'SO']
        self.strict_check = False
        self.use_multithreading = False
        self.extra_resources = {'switches': {'num_switches': 1}}
        self.tags = {'scs', 'external-resources'}
Exemplo n.º 25
0
    def setting_variables(self):

        self.maintainers = ['*****@*****.**']
        self.tags = {'espresso'}
        self.valid_systems = ['ibex:batch_mpi']
        self.sourcesdir = '../src/espresso'

        self.valid_prog_environs = ['cpustack_builtin']
        self.modules = ['quantumespresso/6.4.1/openmpi3.0.0-intel17']
        self.executable = 'pw.x'
        self.executable_opts = ['-in', 'qe.scf.in']
        self.time_limit = '10m'
        energy = sn.extractsingle(r'!\s+total energy\s+=\s+(?P<energy>\S+) Ry',
                                  self.stdout, 'energy', float)
        self.sanity_patterns = sn.all([
            sn.assert_found(r'convergence has been achieved', self.stdout),
            sn.assert_reference(energy, -62.96497971)
        ])
        self.perf_patterns = {
            'time':
            sn.extractsingle(r'electrons    :\s+(?P<sec>\S+)s CPU ',
                             self.stdout, 'sec', float)
        }
        if self.variant == 'single':
            self.num_tasks = 32
            self.descr = 'Quantum Espresso CPU check on Single Node'
            self.num_tasks_per_node = 32
            self.reference = {
                'ibex:batch_mpi': {
                    'time': (0.77, None, 0.05, 's'),
                }
            }
        else:
            self.num_tasks = 32
            self.descr = 'Quantum Espresso CPU check on Multi Node'
            self.num_tasks_per_node = 16
            self.reference = {
                'ibex:batch_mpi': {
                    'time': (0.9, None, 0.05, 's')
                }
            }
Exemplo n.º 26
0
    def check_performance(self):
        """The performance checking phase of the regression test pipeline.

        :raises reframe.core.exceptions.SanityError: If the performance check
            fails.
        """
        if self.perf_patterns is None:
            return

        with os_ext.change_dir(self._stagedir):
            # We first evaluate and log all performance values and then we
            # check them against the reference. This way we always log them
            # even if the don't meet the reference.
            for tag, expr in self.perf_patterns.items():
                value = evaluate(expr)
                key = '%s:%s' % (self._current_partition.fullname, tag)
                if key not in self.reference:
                    raise SanityError(
                        "tag `%s' not resolved in references for `%s'" %
                        (tag, self._current_partition.fullname))

                self._perfvalues[key] = (tag, value, *self.reference[key])
                self._perf_logger.log_performance(logging.INFO, tag, value,
                                                  *self.reference[key])

            for values in self._perfvalues.values():
                tag, val, ref, low_thres, high_thres, *_ = values
                try:
                    evaluate(
                        assert_reference(
                            val,
                            ref,
                            low_thres,
                            high_thres,
                            msg=('failed to meet reference: %s={0}, '
                                 'expected {1} (l={2}, u={3})' % tag),
                        ))
                except SanityError as e:
                    raise PerformanceError(e)
Exemplo n.º 27
0
    def check_performance(self):
        """The performance checking phase of the regression test pipeline.

        :raises reframe.core.exceptions.SanityError: If the performance check
            fails.
        """
        if self.perf_patterns is None:
            return

        with os_ext.change_dir(self._stagedir):
            for tag, expr in self.perf_patterns.items():
                value = evaluate(expr)
                key = '%s:%s' % (self._current_partition.fullname, tag)
                try:
                    ref, low_thres, high_thres = self.reference[key]
                    self._perf_logger.info('value: %s, reference: %s' %
                                           (value, self.reference[key]))
                except KeyError:
                    raise SanityError(
                        "tag `%s' not resolved in references for `%s'" %
                        (tag, self._current_partition.fullname))
                evaluate(assert_reference(value, ref, low_thres, high_thres))
Exemplo n.º 28
0
 def __init__(self, repeat, toolsversion, datalayout):
     super().__init__()
     self.descr = 'Roofline Analysis test with Intel Advisor'
     self.valid_systems = ['daint:mc']
     # Reporting MFLOPS is not available on Intel Haswell cpus, see
     # https://www.intel.fr/content/dam/www/public/us/en/documents/manuals/
     # 64-ia-32-architectures-software-developer-vol-1-manual.pdf
     self.valid_prog_environs = ['PrgEnv-intel']
     # Using advisor/2019 because tests with advisor/2018 (build 551025)
     # raised failures:
     #    roof.dir/nid00753.000/trc000/trc000.advixe
     #    Application exit code: 139
     # advisor/2019 is currently broken on dom ("Exceeded job memory limit")
     self.modules = ['advisor/2019_update3']
     self.prgenv_flags = {
         'PrgEnv-intel': ['-O2', '-g', '-std=c++11'],
     }
     self.sourcesdir = os.path.join(self.current_system.resourcesdir,
                                    'roofline', 'intel_advisor')
     self.build_system = 'Make'
     self.prebuild_cmd = [
         'sed -e "s-XXXX-%s-" -e "s-YYYY-%s-" %s &> %s' %
         (repeat, datalayout, 'roofline_template.cpp', '_roofline.cpp')
     ]
     self.num_tasks = 1
     self.num_tasks_per_node = 1
     self.num_cpus_per_task = 1
     self.variables = {
         'OMP_NUM_THREADS': str(self.num_cpus_per_task),
         'CRAYPE_LINK_TYPE': 'dynamic',
     }
     self.pre_run = [
         'advixe-cl -help collect | head -20',
     ]
     self.executable = 'advixe-cl'
     self.target_executable = './roof.exe'
     self.roofdir = './roof.dir'
     self.executable_opts = [
         '--collect survey --project-dir=%s --search-dir src:rp=. '
         '--data-limit=0 --no-auto-finalize --trace-mpi -- %s ' %
         (self.roofdir, self.target_executable)
     ]
     self.version_rpt = 'Intel_Advisor_version.rpt'
     self.roofline_ref = 'Intel_Advisor_roofline_reference.rpt'
     self.roofline_rpt = 'Intel_Advisor_roofline_results.rpt'
     # Reference roofline boundaries for Intel Broadwell CPU (E5-2695 v4):
     L1bw = 293*1024**3
     L2bw = 79*1024**3
     L3bw = 33*1024**3
     DPfmabw = 45*1024**3
     DPaddbw = 12*1024**3
     ScalarAddbw = 3*1024**3
     self.sanity_patterns = sn.all([
         # check the job status:
         sn.assert_found('loop complete.', self.stdout),
         # check the tool's version (2019=591264, 2018=551025):
         sn.assert_eq(sn.extractsingle(
             r'I*.\(build\s(?P<toolsversion>\d+)\s*.',
             self.version_rpt, 'toolsversion'), toolsversion),
         # --- roofline boundaries:
         # check --report=roofs (L1 bandwidth):
         sn.assert_reference(sn.extractsingle(
             r'^L1\sbandwidth\s\(single-threaded\)\s+(?P<L1bw>\d+)\s+'
             r'memory$', self.roofline_ref, 'L1bw', int),
             L1bw, -0.08, 0.08),
         # check --report=roofs (L2 bandwidth):
         sn.assert_reference(sn.extractsingle(
             r'^L2\sbandwidth\s\(single-threaded\)\s+(?P<L2bw>\d+)\s+'
             r'memory$', self.roofline_ref, 'L2bw', int),
             L2bw, -0.08, 0.08),
         # check --report=roofs (L3 bandwidth):
         sn.assert_reference(sn.extractsingle(
             r'^L3\sbandwidth\s\(single-threaded\)\s+(?P<L3bw>\d+)\s+'
             r'memory$', self.roofline_ref, 'L3bw', int),
             L3bw, -0.08, 0.08),
         # check --report=roofs (DP FMA):
         sn.assert_reference(sn.extractsingle(
             r'^DP Vector FMA Peak\s\(single-threaded\)\s+'
             r'(?P<DPfmabw>\d+)\s+compute$', self.roofline_ref,
             'DPfmabw', int), DPfmabw, -0.08, 0.08),
         # check --report=roofs (DP Add):
         sn.assert_reference(sn.extractsingle(
             r'^DP Vector Add Peak\s\(single-threaded\)\s+'
             r'(?P<DPaddbw>\d+)\s+compute$', self.roofline_ref,
             'DPaddbw', int), DPaddbw, -0.08, 0.08),
         # check --report=roofs (Scalar Add):
         sn.assert_reference(sn.extractsingle(
             r'^Scalar Add Peak\s\(single-threaded\)\s+'
             r'(?P<ScalarAddbw>\d+)\s+compute$', self.roofline_ref,
             'ScalarAddbw', int), ScalarAddbw, -0.08, 0.08),
         # --- check Arithmetic_intensity:
         sn.assert_reference(sn.extractsingle(
             r'^returned\sAI\sgap\s=\s(?P<Intensity>.*)', self.roofline_rpt,
             'Intensity', float), 0.0, -0.01, 0.01),
         # --- check GFLOPS:
         sn.assert_reference(sn.extractsingle(
             r'^returned\sGFLOPS\sgap\s=\s(?P<Flops>.*)', self.roofline_rpt,
             'Flops', float), 0.0, -0.01, 0.01),
     ])
     self.maintainers = ['JG']
     self.tags = {'production'}
Exemplo n.º 29
0
    def test_assert_reference(self):
        self.assertTrue(sn.assert_reference(0.9, 1, -0.2, 0.1))
        self.assertTrue(sn.assert_reference(0.9, 1, upper_thres=0.1))
        self.assertTrue(sn.assert_reference(0.9, 1, lower_thres=-0.2))
        self.assertTrue(sn.assert_reference(0.9, 1))

        # Check negatives
        self.assertTrue(sn.assert_reference(-0.9, -1, -0.2, 0.1))
        self.assertTrue(sn.assert_reference(-0.9, -1, -0.2))
        self.assertTrue(sn.assert_reference(-0.9, -1, upper_thres=0.1))
        self.assertTrue(sn.assert_reference(-0.9, -1))

        # Check upper threshold values greater than 1
        self.assertTrue(sn.assert_reference(20.0, 10.0, None, 3.0))
        self.assertTrue(sn.assert_reference(-50.0, -20.0, -2.0, 0.5))

        self.assertRaisesRegex(
            SanityError, '0\.5 is beyond reference value 1 \(l=0\.8, u=1\.1\)',
            evaluate, sn.assert_reference(0.5, 1, -0.2, 0.1))
        self.assertRaisesRegex(
            SanityError, '0\.5 is beyond reference value 1 \(l=0\.8, u=inf\)',
            evaluate, sn.assert_reference(0.5, 1, -0.2))
        self.assertRaisesRegex(
            SanityError, '1\.5 is beyond reference value 1 \(l=0\.8, u=1\.1\)',
            evaluate, sn.assert_reference(1.5, 1, -0.2, 0.1))
        self.assertRaisesRegex(
            SanityError, '1\.5 is beyond reference value 1 \(l=-inf, u=1\.1\)',
            evaluate,
            sn.assert_reference(1.5, 1, lower_thres=None, upper_thres=0.1))
        self.assertRaisesRegex(
            SanityError,
            '-0\.8 is beyond reference value -1 \(l=-1\.2, u=-0\.9\)',
            evaluate, sn.assert_reference(-0.8, -1, -0.2, 0.1))

        # Check invalid thresholds
        self.assertRaisesRegex(SanityError,
                               'invalid high threshold value: -0\.1', evaluate,
                               sn.assert_reference(0.9, 1, -0.2, -0.1))
        self.assertRaisesRegex(SanityError,
                               'invalid low threshold value: 0\.2', evaluate,
                               sn.assert_reference(0.9, 1, 0.2, 0.1))
        self.assertRaisesRegex(SanityError,
                               'invalid low threshold value: 1\.2', evaluate,
                               sn.assert_reference(0.9, 1, 1.2, 0.1))

        # check invalid thresholds greater than 1
        self.assertRaisesRegex(SanityError,
                               'invalid low threshold value: -2\.0', evaluate,
                               sn.assert_reference(0.9, 1, -2.0, 0.1))
        self.assertRaisesRegex(SanityError,
                               'invalid high threshold value: 1\.5', evaluate,
                               sn.assert_reference(-1.5, -1, -0.5, 1.5))
Exemplo n.º 30
0
def test_assert_reference():
    assert sn.assert_reference(0.9, 1, -0.2, 0.1)
    assert sn.assert_reference(0.9, 1, upper_thres=0.1)
    assert sn.assert_reference(0.9, 1, lower_thres=-0.2)
    assert sn.assert_reference(0.9, 1)

    # Check negatives
    assert sn.assert_reference(-0.9, -1, -0.2, 0.1)
    assert sn.assert_reference(-0.9, -1, -0.2)
    assert sn.assert_reference(-0.9, -1, upper_thres=0.1)
    assert sn.assert_reference(-0.9, -1)

    # Check upper threshold values greater than 1
    assert sn.assert_reference(20.0, 10.0, None, 3.0)
    assert sn.assert_reference(-50.0, -20.0, -2.0, 0.5)
    with pytest.raises(SanityError, match=r'0\.5 is beyond reference value 1 '
                                          r'\(l=0\.8, u=1\.1\)'):
        sn.evaluate(sn.assert_reference(0.5, 1, -0.2, 0.1))

    with pytest.raises(SanityError, match=r'0\.5 is beyond reference value 1 '
                                          r'\(l=0\.8, u=inf\)'):
        sn.evaluate(sn.assert_reference(0.5, 1, -0.2))

    with pytest.raises(SanityError, match=r'1\.5 is beyond reference value 1 '
                                          r'\(l=0\.8, u=1\.1\)'):
        sn.evaluate(sn.assert_reference(1.5, 1, -0.2, 0.1))

    with pytest.raises(SanityError, match=r'1\.5 is beyond reference value 1 '
                                          r'\(l=-inf, u=1\.1\)'):
        sn.evaluate(sn.assert_reference(1.5, 1, lower_thres=None,
                                        upper_thres=0.1))

    with pytest.raises(SanityError,
                       match=r'-0\.8 is beyond reference value -1 '
                             r'\(l=-1\.2, u=-0\.9\)'):
        sn.evaluate(sn.assert_reference(-0.8, -1, -0.2, 0.1))

    # Check invalid thresholds
    with pytest.raises(SanityError,
                       match=r'invalid high threshold value: -0\.1'):
        sn.evaluate(sn.assert_reference(0.9, 1, -0.2, -0.1))

    with pytest.raises(SanityError,
                       match=r'invalid low threshold value: 0\.2'):
        sn.evaluate(sn.assert_reference(0.9, 1, 0.2, 0.1))

    with pytest.raises(SanityError,
                       match=r'invalid low threshold value: 1\.2'):
        sn.evaluate(sn.assert_reference(0.9, 1, 1.2, 0.1))

    # check invalid thresholds greater than 1
    with pytest.raises(SanityError,
                       match=r'invalid low threshold value: -2\.0'):
        sn.evaluate(sn.assert_reference(0.9, 1, -2.0, 0.1))

    with pytest.raises(SanityError,
                       match=r'invalid high threshold value: 1\.5'):
        sn.evaluate(sn.assert_reference(-1.5, -1, -0.5, 1.5))