def consume_cpu_set(self): '''Check that consecutive tasks are round-robin pinned to sockets.''' # Get a set per socket to keep track of the CPUs sockets = [set() for s in range(self.num_sockets)] task_count = 0 for task in range(self.num_tasks_per_socket): for s in range(self.num_sockets): # Get the list of CPUs with affinity affinity_set = self.aff_cpus[task_count] # Only 1 CPU per affinity set is allowed if ((len(affinity_set) > 1) or (any(cpu in sockets[s] for cpu in affinity_set)) or (any(cpu not in self.sockets[s] for cpu in affinity_set))): raise SanityError( f'incorrect affinity set for task {task_count}' ) else: sockets[s].update( self.get_sibling_cpus(affinity_set[0], by='core') ) task_count += 1 # Check that all sockets have the same CPU count if not all(len(s) == (task+1)*2 for s in sockets): self.cpu_set.add(-1) # Decrement the socket set from the CPU set for s in sockets: self.cpu_set -= s
def consume_cpu_set(self): '''Check that each task lives in a different NUMA node.''' if len(self.aff_cpus) != self.num_numa_nodes: raise SanityError( 'number of tasks does not match the number of numa nodes') for numa_node, aff_set in enumerate(self.aff_cpus): cpuset_by_numa = self.get_sibling_cpus(aff_set[0], by='node') if (len(aff_set) != self.num_cpus_per_task or any(cpu not in cpuset_by_numa for cpu in aff_set)): raise SanityError( f'incorrect affinity set for numa node {numa_node}') else: # Decrement the current NUMA node from the available CPU set self.cpu_set -= cpuset_by_numa
def consume_cpu_set(self): threads_in_socket = [0]*self.num_sockets def get_socket_id(cpuid): for i in range(self.num_sockets): if cpuid in self.sockets[i]: return i for affinity_set in self.aff_cpus: # Count the number of OMP threads that live on each socket threads_in_socket[get_socket_id(affinity_set[0])] += 1 # Get CPU siblings by socket cpu_siblings = self.get_sibling_cpus( affinity_set[0], by='socket' ) # The size of the affinity set matches the number of OMP threads # and all CPUs from the set belong to the same socket. if ((self.num_omp_threads != len(affinity_set)) or not all(x in cpu_siblings for x in affinity_set)): raise SanityError('incorrect affinity set') # Remove the sockets the cpu set. for i, socket in enumerate(self.sockets): if threads_in_socket[i] == self.num_omp_threads: self.cpu_set -= socket
def check_performance(self): """The performance checking phase of the regression test pipeline. :raises reframe.core.exceptions.SanityError: If the performance check fails. """ if self.perf_patterns is None: return with os_ext.change_dir(self._stagedir): # We first evaluate and log all performance values and then we # check them against the reference. This way we always log them # even if the don't meet the reference. perf_values = [] for tag, expr in self.perf_patterns.items(): value = evaluate(expr) key = '%s:%s' % (self._current_partition.fullname, tag) if key not in self.reference: raise SanityError( "tag `%s' not resolved in references for `%s'" % (tag, self._current_partition.fullname)) perf_values.append((value, self.reference[key])) self._perf_logger.log_performance(logging.INFO, tag, value, *self.reference[key]) for val, reference in perf_values: ref, low_thres, high_thres, *_ = reference evaluate(assert_reference(val, ref, low_thres, high_thres))
def assert_found(patt, filename, msg=None, encoding='utf-8'): '''Assert that regex pattern ``patt`` is found in the file ``filename``. :arg patt: The regex pattern to search. Any standard Python `regular expression <https://docs.python.org/3/library/re.html#regular-expression-syntax>`_ is accepted. The `re.MULTILINE <https://docs.python.org/3/library/re.html#re.MULTILINE>`_ flag is set for the pattern search. :arg filename: The name of the file to examine. Any :class:`OSError` raised while processing the file will be propagated as a :class:`reframe.core.exceptions.SanityError`. :arg encoding: The name of the encoding used to decode the file. :returns: ``True`` on success. :raises reframe.core.exceptions.SanityError: if assertion fails. ''' num_matches = count(finditer(patt, filename, encoding)) try: evaluate(assert_true(num_matches)) except SanityError: error_msg = msg or "pattern `{0}' not found in `{1}'" raise SanityError(_format(error_msg, patt, filename)) else: return True
def assert_reference(val, ref, lower_thres=None, upper_thres=None, msg=None): """Assert that value ``val`` respects the reference value ``ref``. :arg val: The value to check. :arg ref: The reference value. :arg lower_thres: The lower threshold value expressed as a negative decimal fraction of the reference value. Must be in [-1, 0]. If ``None``, no lower thresholds is applied. :arg upper_thres: The upper threshold value expressed as a decimal fraction of the reference value. Must be in [0, 1]. If ``None``, no upper thresholds is applied. :returns: ``True`` on success. :raises reframe.core.exceptions.SanityError: if assertion fails or if the lower and upper thresholds do not have appropriate values. """ if lower_thres is not None: try: evaluate(assert_bounded(lower_thres, -1, 0)) except SanityError: raise SanityError('invalid low threshold value: %s' % lower_thres) if upper_thres is not None: try: evaluate(assert_bounded(upper_thres, 0, 1)) except SanityError: raise SanityError('invalid high threshold value: %s' % upper_thres) def calc_bound(thres): if thres is None: return None # Inverse threshold if ref < 0 if ref < 0: thres = -thres return ref * (1 + thres) lower = calc_bound(lower_thres) or float('-inf') upper = calc_bound(upper_thres) or float('inf') try: evaluate(assert_bounded(val, lower, upper)) except SanityError: error_msg = '{0} is beyond reference value {1} (l={2}, u={3})' raise SanityError(_format(error_msg, val, ref, lower, upper)) else: return True
def assert_lt(a, b, msg=None): """Assert that ``a < b``. :returns: ``True`` on success. :raises reframe.core.exceptions.SanityError: if assertion fails. """ if a >= b: error_msg = msg or '{0} >= {1}' raise SanityError(_format(error_msg, a, b)) return True
def consume_cpu_set(self): '''Threads are bound to sockets.''' for affinity_set in self.aff_cpus: # Get CPU siblings by socket cpu_siblings = self.get_sibling_cpus(affinity_set[0], by='socket') # Alll CPUs in the affinity set must belong to the same socket if (not all(x in self.cpu_set for x in affinity_set) or not all(x in cpu_siblings for x in affinity_set)): raise SanityError('incorrect affinity set') # Decrement all the CPUs in this socket from the cpu set. self.cpu_set -= cpu_siblings
def check_performance(self): """The performance checking phase of the regression test pipeline. :raises reframe.core.exceptions.SanityError: If the performance check fails. """ if self.perf_patterns is None: return with os_ext.change_dir(self._stagedir): for tag, expr in self.perf_patterns.items(): value = evaluate(expr) key = '%s:%s' % (self._current_partition.fullname, tag) try: ref, low_thres, high_thres = self.reference[key] self._perf_logger.info('value: %s, reference: %s' % (value, self.reference[key])) except KeyError: raise SanityError( "tag `%s' not resolved in references for `%s'" % (tag, self._current_partition.fullname)) evaluate(assert_reference(value, ref, low_thres, high_thres))
def check_performance(self): """The performance checking phase of the regression test pipeline. :raises reframe.core.exceptions.SanityError: If the performance check fails. """ if self.perf_patterns is None: return with os_ext.change_dir(self._stagedir): # We first evaluate and log all performance values and then we # check them against the reference. This way we always log them # even if the don't meet the reference. for tag, expr in self.perf_patterns.items(): value = evaluate(expr) key = '%s:%s' % (self._current_partition.fullname, tag) if key not in self.reference: raise SanityError( "tag `%s' not resolved in references for `%s'" % (tag, self._current_partition.fullname)) self._perfvalues[key] = (tag, value, *self.reference[key]) self._perf_logger.log_performance(logging.INFO, tag, value, *self.reference[key]) for values in self._perfvalues.values(): tag, val, ref, low_thres, high_thres, *_ = values try: evaluate( assert_reference( val, ref, low_thres, high_thres, msg=('failed to meet reference: %s={0}, ' 'expected {1} (l={2}, u={3})' % tag), )) except SanityError as e: raise PerformanceError(e)
def check_performance(self): """The performance checking phase of the regression test pipeline. :raises reframe.core.exceptions.SanityError: If the performance check fails. """ if self.perf_patterns is None: return with os_ext.change_dir(self._stagedir): # Check if default reference perf values are provided and # store all the variables tested in the performance check has_default = False variables = set() for key, ref in self.reference.items(): keyparts = key.split(self.reference.scope_separator) system = keyparts[0] varname = keyparts[-1] try: unit = ref[3] except IndexError: unit = None variables.add((varname, unit)) if system == '*': has_default = True break if not has_default: if not variables: # If empty, it means that self.reference was empty, so try # to infer their name from perf_patterns variables = {(name, None) for name in self.perf_patterns.keys()} for var in variables: name, unit = var ref_tuple = (0, None, None) if unit: ref_tuple += (unit, ) self.reference.update({'*': {name: ref_tuple}}) # We first evaluate and log all performance values and then we # check them against the reference. This way we always log them # even if the don't meet the reference. for tag, expr in self.perf_patterns.items(): value = evaluate(expr) key = '%s:%s' % (self._current_partition.fullname, tag) if key not in self.reference: raise SanityError( "tag `%s' not resolved in references for `%s'" % (tag, self._current_partition.fullname)) self._perfvalues[key] = (value, *self.reference[key]) self._perf_logger.log_performance(logging.INFO, tag, value, *self.reference[key]) for key, values in self._perfvalues.items(): val, ref, low_thres, high_thres, *_ = values tag = key.split(':')[-1] try: evaluate( assert_reference( val, ref, low_thres, high_thres, msg=('failed to meet reference: %s={0}, ' 'expected {1} (l={2}, u={3})' % tag))) except SanityError as e: raise PerformanceError(e)