def make_stagedir(self, *dirs): wipeout = self.get_option('general/0/clean_stagedir') ret = self._makedir(self.stage_prefix, *self._format_dirs(*dirs), wipeout=wipeout) getlogger().debug( f'Created stage directory {ret!r} [clean_stagedir: {wipeout}]' ) return ret
def snooze(self): t_elapsed = time.time() - self._t_init self._num_polls += 1 poll_rate = self._num_polls / t_elapsed if t_elapsed else math.inf getlogger().debug2( f'Poll rate control: sleeping for {self._sleep_duration}s ' f'(current poll rate: {poll_rate} polls/s)') time.sleep(self._sleep_duration)
def test_logging_context(default_exec_ctx, logfile): rlog.configure_logging(rt.runtime().site_config) with rlog.logging_context() as logger: assert logger is rlog.getlogger() assert logger is not rlog.null_logger rlog.getlogger().error('error from context') assert _found_in_logfile('reframe', logfile) assert _found_in_logfile('error from context', logfile)
def on_task_failure(self, task): msg = f'{task.check.info()} [{task.pipeline_timings_basic()}]' if task.failed_stage == 'cleanup': self.printer.status('ERROR', msg, just='right') else: self._remove_from_running(task) self.printer.status('FAIL', msg, just='right') getlogger().verbose(f"==> {task.pipeline_timings_all()}")
def on_task_success(self, task): msg = f'{task.check.info()} [{task.pipeline_timings_basic()}]' self.printer.status('OK', msg, just='right') getlogger().verbose(f"==> {task.pipeline_timings_all()}") # update reference count of dependencies for c in task.testcase.deps: self._task_index[c].ref_count -= 1 self._retired_tasks.append(task)
def _finalize_task(self, task): getlogger().debug2(f'Finalizing task {task.testcase}') if not self.skip_sanity_check: task.sanity() if not self.skip_performance_check: task.performance() task.finalize()
def cancel(self): getlogger().debug('cancelling job (id=%s)' % self._jobid) if self._jobid is None: raise ReframeError('no job is spawned yet') os_ext.run_command('scancel %s' % self._jobid, check=True, timeout=settings.job_submit_timeout) self._is_cancelling = True
def _remove_from_running(self, task): getlogger().debug2( f'Removing task from the running list: {task.testcase}') try: partname = task.check.current_partition.fullname self._running_tasks[partname].remove(task) except (ValueError, AttributeError, KeyError): getlogger().debug2('Task was not running') pass
def test_logging_context(self): rlog.configure_logging(self.logging_config) with rlog.logging_context() as logger: self.assertIs(logger, rlog.getlogger()) self.assertIsNot(logger, rlog.null_logger) rlog.getlogger().error('error from context') self.assertTrue(self.found_in_logfile('reframe')) self.assertTrue(self.found_in_logfile('error from context'))
def _kill_all(self): """Send SIGKILL to all the processes of the spawned job.""" try: os.killpg(self._jobid, signal.SIGKILL) except (ProcessLookupError, PermissionError): # The process group may already be dead or assigned to a different # group, so ignore this error getlogger().debug( 'pid %s already dead or assigned elsewhere' % self._jobid)
def test_logging_context(self): rlog.configure_logging(self.logging_config) with rlog.logging_context() as logger: assert logger is rlog.getlogger() assert logger is not rlog.null_logger rlog.getlogger().error('error from context') assert self.found_in_logfile('reframe') assert self.found_in_logfile('error from context')
def runcase(self, case): super().runcase(case) check, partition, environ = case self._partitions.add(partition) # Set partition-based counters, if not set already self._running_tasks.setdefault(partition.fullname, []) self._ready_tasks.setdefault(partition.fullname, []) self._max_jobs.setdefault(partition.fullname, partition.max_jobs) task = RegressionTask(case, self.task_listeners) self._task_index[case] = task self.stats.add_task(task) self.printer.status( 'RUN', '%s on %s using %s' % (check.name, partition.fullname, environ.name) ) try: partname = partition.fullname if not self._setup_task(task): if not task.failed: self.printer.status( 'DEP', '%s on %s using %s' % (check.name, partname, environ.name), just='right' ) self._waiting_tasks.append(task) return if len(self._running_tasks[partname]) >= partition.max_jobs: # Make sure that we still exceeded the job limit getlogger().debug2( f'Reached concurrency limit for partition {partname!r}: ' f'{partition.max_jobs} job(s)' ) self._poll_tasks() if len(self._running_tasks[partname]) < partition.max_jobs: # Task was put in _ready_tasks during setup self._ready_tasks[partname].pop() self._reschedule(task) else: self.printer.status('HOLD', task.check.info(), just='right') except TaskExit: if not task.failed: with contextlib.suppress(TaskExit): self._reschedule(task) return except ABORT_REASONS as e: # If abort was caused due to failure elsewhere, abort current # task as well task.abort(e) self._failall(e) raise
def load_config(filename=None): if filename is None: filename = _find_config_file() if filename is None: # Return the generic configuration getlogger().debug('no configuration found; ' 'falling back to a generic one') return _SiteConfig(settings.site_configuration, '<builtin>') return _SiteConfig.create(filename)
def conditional_compile(self): build_dir = os.path.abspath(os.path.join('builds', self.current_system.name, self.current_partition.name, self.current_environ.name, self.name)) build_path = os.path.join(build_dir, self.executable) if os.path.exists(build_path): self.build_path = build_path getlogger().info('found exe at %r', self.build_path) self.build_system = NoBuild() self.sourcesdir = build_dir # means reframe will copy the exe back in else: self.build_path = None
def finished(self, job): try: self._update_state(job) except JobError as e: # We ignore these exceptions at this point and we simply mark the # job as unfinished. getlogger().debug('ignoring error during polling: %s' % e) return False else: return job.state == 'COMPLETED'
def cancel(self): super().cancel() # Recreate the full job id jobid = str(self._jobid) if self._pbs_server: jobid += '.' + self._pbs_server getlogger().debug('cancelling job (id=%s)' % jobid) self._run_command('qdel %s' % jobid, settings().job_submit_timeout)
def test_logging_context_check(self): rlog.configure_logging(self.logging_config) with rlog.logging_context(check=self.check): rlog.getlogger().error('error from context') rlog.getlogger().error('error outside context') assert self.found_in_logfile('_FakeCheck: %s: error from context' % sys.argv[0]) assert self.found_in_logfile('reframe: %s: error outside context' % sys.argv[0])
def _save_info(filename, topo_info): if not topo_info: return os.makedirs(os.path.dirname(filename), exist_ok=True) try: with open(filename, 'w') as fp: json.dump(topo_info, fp, indent=2) except OSError as e: getlogger().warning(f'could not save topology file: {filename!r}: {e}')
def _load_info(filename, schema=None): try: with open(filename) as fp: return _validate_info(json.load(fp), schema) except OSError as e: getlogger().warning(f'could not load file: {filename!r}: {e}') return {} except jsonschema.ValidationError as e: raise ConfigError( f'could not validate meta-config file {filename!r}') from e
def _remove_from_running(self, task): getlogger().debug('removing task: %s' % task.check.info()) try: self._running_tasks.remove(task) except ValueError: getlogger().debug('not in running tasks') pass else: partname = task.check.current_partition.fullname self._running_tasks_counts[partname] -= 1
def snooze(self): from reframe.core.logging import getlogger t_elapsed = time.time() - self._t_init self._num_polls += 1 getlogger().debug( f'poll rate control: sleeping for {self._sleep_duration}s ' f'(current poll rate: {self._num_polls/t_elapsed} polls/s)' ) time.sleep(self._sleep_duration)
def _reschedule(self, task, load_env=True): getlogger().debug('scheduling test case for running') # Restore the test case's environment and run it if load_env: task.resume() task.compile() task.compile_wait() task.run()
def copy_executable(self): if not self.build_path: # i.e. only if actually did a compile: self.exes_dir = os.path.join('builds', self.current_system.name, self.current_partition.name, self.current_environ.name, self.name) exe_path = os.path.join(self.stagedir, self.executable) build_path = os.path.join(self.exes_dir, self.executable) build_dir = os.path.dirpath(build_path) # self.executable might include a directory if not os.path.exists(build_dir): os.makedirs(build_dir) shutil.copy(exe_path, build_path) getlogger().info('copied exe to %r', build_path)
def _merge_files(self, job): with os_ext.change_dir(job.workdir): out_glob = glob.glob(job.stdout + '_*') err_glob = glob.glob(job.stderr + '_*') getlogger().debug( 'merging job array output files: %s' % ', '.join(out_glob)) os_ext.concat_files(job.stdout, *out_glob, overwrite=True) getlogger().debug( 'merging job array error files: %s' % ','.join(err_glob)) os_ext.concat_files(job.stderr, *err_glob, overwrite=True)
def _remove_from_running(self, task): getlogger().debug( 'removing task from running list: %s' % task.check.info() ) try: partname = task.check.current_partition.fullname self._running_tasks[partname].remove(task) except (ValueError, AttributeError, KeyError): getlogger().debug('not in running tasks') pass
def test_logging_context_check(default_exec_ctx, logfile, fake_check): rlog.configure_logging(rt.runtime().site_config) with rlog.logging_context(check=fake_check): rlog.getlogger().error('error from context') rlog.getlogger().error('error outside context') assert _found_in_logfile(f'_FakeCheck: {sys.argv[0]}: error from context', logfile) assert _found_in_logfile(f'reframe: {sys.argv[0]}: error outside context', logfile)
def _reschedule(self, task, load_env=True): getlogger().debug('scheduling test case for running') partname = task.check.current_partition.fullname # Restore the test case's environment and run it if load_env: task.resume() task.compile() task.run()
def test_handler_append(default_exec_ctx, logfile): rlog.configure_logging(rt.runtime().site_config) rlog.getlogger().warning('foo') _close_handlers() # Reload logger rlog.configure_logging(rt.runtime().site_config) rlog.getlogger().warning('bar') assert _found_in_logfile('foo', logfile) assert _found_in_logfile('bar', logfile)
def _advance_ready_run(self, task): partname = _get_partition_name(task, phase='run') max_jobs = self._max_jobs[partname] if len(self._partition_tasks[partname]) < max_jobs: if self._exec_stage(task, [task.run]): self._partition_tasks[partname].add(task) return 1 getlogger().debug2(f'Hit the max job limit of {partname}: {max_jobs}') return 0
def finalize(self): try: jsonfile = os.path.join(self.check.stagedir, '.rfm_testcase.json') with open(jsonfile, 'w') as fp: jsonext.dump(self.check, fp, indent=2) except OSError as e: logging.getlogger().warning( f'could not dump test case {self.case}: {e}') self._current_stage = 'finalize' self._notify_listeners('on_task_success')