def test_exec_cmd(self): cmd = ["echo", "hello-world"] pid, ecode, output = HostRunner._exec_cmd(cmd, logging=False) self.assertGreater(pid, 0) self.assertEqual(ecode, 0) self.assertEqual(output, "hello-world") with LogCapture("popper") as logc: pid, ecode, output = HostRunner._exec_cmd(cmd) self.assertGreater(pid, 0) self.assertEqual(ecode, 0) self.assertEqual(output, "") logc.check_present(("popper", "STEP_INFO", "hello-world")) cmd = ["env"] pid, ecode, output = HostRunner._exec_cmd(cmd, env={"TESTACION": "test"}, cwd="/tmp", logging=False) self.assertGreater(pid, 0) self.assertEqual(ecode, 0) self.assertTrue("TESTACION" in output) _pids = set() _, _, _ = HostRunner._exec_cmd(["sleep", "2"], pids=_pids) self.assertEqual(len(_pids), 1)
def test_exec_cmd(self): cmd = ["echo", "hello-world"] pid, ecode, output = HostRunner._exec_cmd(cmd, logging=False) self.assertGreater(pid, 0) self.assertEqual(ecode, 0) self.assertEqual(output, "hello-world\n") with LogCapture('popper') as log: pid, ecode, output = HostRunner._exec_cmd(cmd) self.assertGreater(pid, 0) self.assertEqual(ecode, 0) self.assertEqual(output, "") log.check_present(('popper', 'STEP_INFO', 'hello-world\n')) cmd = ["env"] pid, ecode, output = HostRunner._exec_cmd(cmd, env={'TESTACION': 'test'}, cwd="/tmp", logging=False) self.assertGreater(pid, 0) self.assertEqual(ecode, 0) self.assertTrue('TESTACION' in output) _pids = set() _, _, _ = HostRunner._exec_cmd(["sleep", "2"], pids=_pids) self.assertEqual(len(_pids), 1)
def test_stop_running_tasks(self): with PodmanRunner() as pr: cmd = ["podman", "run", "-d", "-q"] _, _, c1 = HostRunner._exec_cmd( cmd + ["debian:buster-slim", "sleep", "20000"], logging=False, ) _, _, c2 = HostRunner._exec_cmd(cmd + ["alpine:3.9", "sleep", "10000"], logging=False) c1 = c1.strip() c2 = c2.strip() pr._spawned_containers.add(c1) pr._spawned_containers.add(c2) pr.stop_running_tasks() status_cmd = [ "podman", "container", "inspect", "-f", str("{{.State.Status}}"), ] c1_status_cmd = status_cmd + [c1] c2_status_cmd = status_cmd + [c2] _, _, c1_status = HostRunner._exec_cmd(c1_status_cmd, logging=False) _, _, c2_status = HostRunner._exec_cmd(c2_status_cmd, logging=False) self.assertEqual(c1_status, "exited") self.assertEqual(c2_status, "exited")
def _exec_srun(self, cmd, step, **kwargs): self._set_config_vars(step) _cmd = [ "srun", "--nodes", f"{self._nodes}", "--ntasks", f"{self._ntasks}", "--ntasks-per-node", f"{self._ntasks_per_node}", ] if self._nodelist: _cmd.extend(["--nodelist", self._nodelist]) _cmd.extend(self._get_resman_kwargs(step)) _cmd.extend(cmd) log.debug(f"Command: {_cmd}") if self._config.dry_run: return 0 _, ecode, _ = HostRunner._exec_cmd(_cmd, **kwargs) return ecode
def test_find_container(self): config = ConfigLoader.load() step = Box( { "uses": "docker://alpine:3.9", "runs": ["echo hello"], "id": "kontainer_one", }, default_box=True, ) cid = pu.sanitized_name(step.id, config.wid) with PodmanRunner(init_podman_client=True, config=config) as pr: c = pr._find_container(cid) self.assertEqual(c, None) with PodmanRunner(init_podman_client=True, config=config) as pr: container = pr._create_container(cid, step) c = pr._find_container(cid) self.assertEqual(c, container) cmd = ["podman", "container", "rm", "-f", cid] HostRunner._exec_cmd(cmd, logging=False)
def run(self, step): self._setup_singularity_cache() cid = pu.sanitized_name(step.id, self._config.wid) + ".sif" self._container = os.path.join(self._singularity_cache, cid) build, img, build_ctx_path = self._get_build_info(step) HostRunner._exec_cmd(["rm", "-rf", self._container]) if not self._config.dry_run: if build: recipefile = self._get_recipe_file(build_ctx_path, cid) HostRunner._exec_cmd( [ "singularity", "build", "--fakeroot", self._container, recipefile ], cwd=build_ctx_path, ) else: HostRunner._exec_cmd( ["singularity", "pull", self._container, img]) cmd = [self._create_cmd(step, cid)] self._spawned_containers.add(cid) ecode = self._submit_batch_job(cmd, step) self._spawned_containers.remove(cid) return ecode
def run(self, step): self._setup_singularity_cache() cid = pu.sanitized_name(step['name'], self._config.wid) + '.sif' self._container = os.path.join(self._singularity_cache, cid) build, img, build_ctx_path = self._get_build_info(step) HostRunner._exec_cmd(['rm', '-rf', self._container]) if not self._config.dry_run: if build: recipefile = self._get_recipe_file(build_ctx_path, cid) HostRunner._exec_cmd([ 'singularity', 'build', '--fakeroot', self._container, recipefile ], cwd=build_ctx_path) else: HostRunner._exec_cmd( ['singularity', 'pull', self._container, img]) cmd = [self._create_cmd(step, cid)] self._spawned_containers.add(cid) ecode = self._submit_batch_job(cmd, step) self._spawned_containers.remove(cid) return ecode
def _exec_mpi(self, cmd, step, **kwargs): self._set_config_vars(step) job_name = pu.sanitized_name(step.id, self._config.wid) mpi_cmd = ["mpirun", f"{' '.join(cmd)}"] job_script = os.path.join(f"{job_name}.sh") out_file = os.path.join(f"{job_name}.out") with open(out_file, "w"): pass with open(job_script, "w") as f: f.write("#!/bin/bash\n") f.write(f"#SBATCH --job-name={job_name}\n") f.write(f"#SBATCH --output={out_file}\n") f.write(f"#SBATCH --nodes={self._nodes}\n") f.write(f"#SBATCH --ntasks={self._ntasks}\n") f.write(f"#SBATCH --ntasks-per-node={self._ntasks_per_node}\n") if self._nodelist: f.write(f"#SBATCH --nodelist={self._nodelist}\n") f.write(" ".join(mpi_cmd)) sbatch_cmd = [ "sbatch", "--wait", ] sbatch_cmd.extend(self._get_resman_kwargs(step)) sbatch_cmd.extend([job_script]) log.debug(f"Command: {sbatch_cmd}") if self._config.dry_run: return 0 self._spawned_jobs.add(job_name) self._start_out_stream(out_file) _, ecode, _ = HostRunner._exec_cmd(sbatch_cmd, **kwargs) self._stop_out_stream() self._spawned_jobs.remove(job_name) return ecode
def _submit_batch_job(self, cmd, step): job_name = pu.sanitized_name(step.id, self._config.wid) temp_dir = "/tmp/popper/slurm/" os.makedirs(temp_dir, exist_ok=True) job_script = os.path.join(temp_dir, f"{job_name}.sh") out_file = os.path.join(temp_dir, f"{job_name}.out") # create/truncate log with open(out_file, "w"): pass with open(job_script, "w") as f: f.write("#!/bin/bash\n") f.write("\n".join(cmd)) sbatch_cmd = f"sbatch --wait --job-name {job_name} --output {out_file}" sbatch_cmd = sbatch_cmd.split() for k, v in self._config.resman_opts.get(step.id, {}).items(): sbatch_cmd.append(pu.key_value_to_flag(k, v)) sbatch_cmd.append(job_script) log.info(f'[{step.id}] {" ".join(sbatch_cmd)}') if self._config.dry_run: return 0 self._spawned_jobs.add(job_name) # start a tail (background) process on the output file self._start_out_stream(out_file) # submit the job and wait _, ecode, output = HostRunner._exec_cmd(sbatch_cmd, logging=False) # kill the tail process self._stop_out_stream() self._spawned_jobs.remove(job_name) return ecode
def test_create_container(self): config = ConfigLoader.load() step = Box( { "uses": "docker://alpine:3.9", "runs": ["echo hello"], "id": "kontainer_one", }, default_box=True, ) cid = pu.sanitized_name(step.id, config.wid) with PodmanRunner(init_podman_client=True, config=config) as pr: c = pr._create_container(cid, step) c_status_cmd = [ "podman", "container", "inspect", "-f", str("{{.State.Status}}"), c, ] __, _, c_status = HostRunner._exec_cmd(c_status_cmd, logging=False) self.assertEqual(c_status, "configured") cmd = ["podman", "container", "rm", c] HostRunner._exec_cmd(cmd, logging=False) step = Box( { "uses": "docker://alpine:3.9", "runs": ["echo", "hello_world"], "id": "KoNtAiNeR tWo", }, default_box=True, ) cid = pu.sanitized_name(step.id, config.wid) with PodmanRunner(init_podman_client=True, config=config) as pr: c = pr._create_container(cid, step) c_status_cmd = [ "podman", "container", "inspect", "-f", str("{{.State.Status}}"), c, ] __, _, c_status = HostRunner._exec_cmd(c_status_cmd, logging=False) self.assertEqual(c_status, "configured") cmd = ["podman", "container", "rm", c] HostRunner._exec_cmd(cmd, logging=False)
def _tail_output(self, out_file): self._out_stream_pid = set() _, ecode, _ = HostRunner._exec_cmd(["tail", "-f", out_file], pids=self._out_stream_pid) return ecode
def stop_running_tasks(self): for job_name in self._spawned_jobs: log.info(f"Cancelling job {job_name}") _, ecode, _ = HostRunner._exec_cmd(["scancel", "--name", job_name]) if ecode != 0: log.warning(f"Failed to cancel the job {job_name}.")
def stop_running_tasks(self): for job_name in self._spawned_jobs: log.info(f'Cancelling job {job_name}') _, ecode, _ = HostRunner._exec_cmd(['scancel', '--name', job_name]) if ecode != 0: log.warning(f'Failed to cancel the job {job_name}.')