def poll(self, *jobs): '''Update the status of the jobs.''' if jobs: # Filter out non-jobs jobs = [job for job in jobs if job is not None] if not jobs: return with rt.temp_environment(variables={'SLURM_TIME_FORMAT': '%s'}): t_start = time.strftime( '%F', time.localtime(min(job.submit_time for job in jobs))) completed = _run_strict( f'sacct -S {t_start} -P ' f'-j {",".join(job.jobid for job in jobs)} ' f'-o jobid,state,exitcode,end,nodelist') self._update_state_count += 1 # We need the match objects, so we have to use finditer() state_match = list( re.finditer( fr'^(?P<jobid>{self._jobid_patt})\|(?P<state>\S+)([^\|]*)\|' fr'(?P<exitcode>\d+)\:(?P<signal>\d+)\|(?P<end>\S+)\|' fr'(?P<nodespec>.*)', completed.stdout, re.MULTILINE)) if not state_match: self.log( f'Job state not matched (stdout follows)\n{completed.stdout}') return job_info = {} for s in state_match: # Take into account both job arrays and heterogeneous jobs jobid = re.split(r'_|\+', s.group('jobid'))[0] job_info.setdefault(jobid, []).append(s) for job in jobs: try: jobarr_info = job_info[job.jobid] except KeyError: continue # Join the states with ',' in case of job arrays|heterogeneous jobs job._state = ','.join(m.group('state') for m in jobarr_info) if not self._update_state_count % self.SACCT_SQUEUE_RATIO: self._cancel_if_blocked(job) self._cancel_if_pending_too_long(job) if slurm_state_completed(job.state): # Since Slurm exitcodes are positive take the maximum one job._exitcode = max( int(m.group('exitcode')) for m in jobarr_info) # Use ',' to join nodes to be consistent with Slurm syntax self._update_nodelist( job, ','.join(m.group('nodespec') for m in jobarr_info)) self._update_completion_time(job, (m.group('end') for m in jobarr_info))
def test_temp_environment(base_environ, user_runtime, modules_system): with rt.temp_environment( ['testmod_foo'], {'_var0': 'val2', '_var3': 'val3'} ) as environ: assert rt.is_env_loaded(environ) assert not rt.is_env_loaded(environ)
def completion_time(self, job): if (self._completion_time or not slurm_state_completed(job.state)): return self._completion_time with rt.temp_environment(variables={'SLURM_TIME_FORMAT': '%s'}): completed = os_ext.run_command( 'sacct -S %s -P -j %s -o jobid,end' % (self._submit_time.strftime('%F'), job.jobid), log=False) state_match = list( re.finditer(r'^(?P<jobid>%s)\|(?P<end>\S+)' % self._state_patt, completed.stdout, re.MULTILINE)) if not state_match: return None completion_times = [] for s in state_match: with suppress(ValueError): completion_times.append(float(s.group('end'))) if completion_times: self._completion_time = max(completion_times) return self._completion_time
def test_option_envvar_conversion_error(default_exec_ctx, extended_parser): with rt.temp_environment(variables={ 'RFM_NON_DEFAULT_CRAYPE': 'foo', }): site_config = rt.runtime().site_config options = extended_parser.parse_args(['--nocolor']) errors = options.update_config(site_config) assert len(errors) == 1
def test_option_envvar_conversion_error(extended_parser): with rt.temp_runtime(fixtures.BUILTIN_CONFIG_FILE): with rt.temp_environment(variables={ 'RFM_NON_DEFAULT_CRAYPE': 'foo', }): site_config = rt.runtime().site_config options = extended_parser.parse_args(['--nocolor']) errors = options.update_config(site_config) assert len(errors) == 1
def test_temp_environment(self): self.setup_modules_system() with rt.temp_environment(['testmod_foo'], { '_var0': 'val2', '_var3': 'val3' }) as environ: assert rt.is_env_loaded(environ) assert not rt.is_env_loaded(environ)
def test_option_precedence(default_exec_ctx, extended_parser): with rt.temp_environment( variables={ 'RFM_TIMESTAMP': '%F', 'RFM_NON_DEFAULT_CRAYPE': 'yes', 'RFM_MODULES_PRELOAD': 'a,b,c', 'RFM_CHECK_SEARCH_PATH': 'x:y:z' }): options = extended_parser.parse_args( ['--timestamp=%FT%T', '--nocolor']) assert options.recursive is None assert options.timestamp == '%FT%T' assert options.non_default_craype is True assert options.config_file is None assert options.prefix is None assert options.stagedir == '/foo' assert options.module == ['a', 'b', 'c'] assert options.check_path == ['x', 'y', 'z'] assert options.colorize is False
def test_option_with_config(default_exec_ctx, extended_parser, tmp_path): with rt.temp_environment( variables={ 'RFM_TIMESTAMP': '%F', 'RFM_NON_DEFAULT_CRAYPE': 'yes', 'RFM_MODULES_PRELOAD': 'a,b,c', 'RFM_KEEP_STAGE_FILES': 'no' }): site_config = rt.runtime().site_config options = extended_parser.parse_args( ['--timestamp=%FT%T', '--nocolor']) options.update_config(site_config) assert site_config.get('general/0/check_search_recursive') is False assert site_config.get('general/0/timestamp_dirs') == '%FT%T' assert site_config.get('general/0/non_default_craype') is True assert site_config.get('systems/0/prefix') == str(tmp_path) assert site_config.get('general/0/colorize') is False assert site_config.get('general/0/keep_stage_files') is False # Defaults specified in parser override those in configuration file assert site_config.get('systems/0/stagedir') == '/foo'
def detect_topology(): rt = runtime.runtime() detect_remote_systems = rt.get_option('general/0/remote_detect') topo_prefix = os.path.join(os.getenv('HOME'), '.reframe/topology') for part in rt.system.partitions: getlogger().debug(f'detecting topology info for {part.fullname}') found_procinfo = False found_devinfo = False if part.processor.info != {}: # Processor info set up already in the configuration getlogger().debug( f'> topology found in configuration file; skipping...') found_procinfo = True if part.devices: # Devices set up already in the configuration getlogger().debug( f'> devices found in configuration file; skipping...') found_devinfo = True if found_procinfo and found_devinfo: continue topo_file = os.path.join(topo_prefix, f'{rt.system.name}-{part.name}', 'processor.json') dev_file = os.path.join(topo_prefix, f'{rt.system.name}-{part.name}', 'devices.json') if not found_procinfo and os.path.exists(topo_file): getlogger().debug( f'> found topology file {topo_file!r}; loading...') try: part._processor = ProcessorInfo( _load_info(topo_file, _subschema('#/defs/processor_info'))) found_procinfo = True except json.decoder.JSONDecodeError as e: getlogger().debug( f'> could not load {topo_file!r}: {e}: ignoring...') if not found_devinfo and os.path.exists(dev_file): getlogger().debug(f'> found devices file {dev_file!r}; loading...') try: devices_info = _load_info(dev_file, _subschema('#/defs/devices')) part._devices = [DeviceInfo(d) for d in devices_info] found_devinfo = True except json.decoder.JSONDecodeError as e: getlogger().debug( f'> could not load {dev_file!r}: {e}: ignoring...') if found_procinfo and found_devinfo: continue if not found_procinfo: # No topology found, try to auto-detect it getlogger().debug(f'> no topology file found; auto-detecting...') modules = list(rt.system.preload_environ.modules) vars = dict(rt.system.preload_environ.variables.items()) if _is_part_local(part): modules += part.local_env.modules vars.update(part.local_env.variables) # Unconditionally detect the system for fully local partitions with runtime.temp_environment(modules=modules, variables=vars): part._processor = ProcessorInfo(cpuinfo()) _save_info(topo_file, part.processor.info) elif detect_remote_systems: with runtime.temp_environment(modules=modules, variables=vars): part._processor = ProcessorInfo(_remote_detect(part)) if part.processor.info: _save_info(topo_file, part.processor.info) if not found_devinfo: getlogger().debug(f'> device auto-detection is not supported')
def test_envvar_option(default_exec_ctx, extended_parser): with rt.temp_environment(variables={'RFM_ENV_OPT': 'BAR'}): options = extended_parser.parse_args([]) assert options.env_option == 'BAR'