def add_active_task(self, task_id): if task_id in self.finished_tasks: log.error('Found an active task (%s) in finished tasks?' % task_id) return task_monitor = TaskMonitor(self._pathspec, task_id) if not task_monitor.get_state().header: log.info('Unable to load task "%s"' % task_id) return sandbox = task_monitor.get_state().header.sandbox resource_monitor = self._resource_monitor(task_monitor, sandbox) resource_monitor.start() self._active_tasks[task_id] = ActiveObservedTask( task_id=task_id, pathspec=self._pathspec, task_monitor=task_monitor, resource_monitor=resource_monitor )
def test_basic_as_job(self): proxy_driver = ProxyDriver() with temporary_dir() as tempdir: te = ThermosExecutor(runner_provider=make_provider(tempdir), sandbox_provider=DefaultTestSandboxProvider) te.launchTask(proxy_driver, make_task(MESOS_JOB(task=HELLO_WORLD), instanceId=0)) te.runner_started.wait() while te._status_manager is None: time.sleep(0.1) te.terminated.wait() tm = TaskMonitor(TaskPath(root=tempdir), task_id=HELLO_WORLD_TASK_ID) runner_state = tm.get_state() assert 'hello_world_hello_world-001' in runner_state.processes, ( 'Could not find processes, got: %s' % ' '.join(runner_state.processes)) updates = proxy_driver.method_calls['sendStatusUpdate'] assert len(updates) == 3 status_updates = [arg_tuple[0][0] for arg_tuple in updates] assert status_updates[0].state == mesos_pb.TASK_STARTING assert status_updates[1].state == mesos_pb.TASK_RUNNING assert status_updates[2].state == mesos_pb.TASK_FINISHED
def test_coordinator_dead_kill(self): runner = self.start_runner() tm = TaskMonitor(runner.pathspec, runner.task_id) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == "ignorant_process" assert run_number == 0 os.kill(runner.po.pid, signal.SIGKILL) os.kill(process_state.coordinator_pid, signal.SIGKILL) os.kill(process_state.pid, signal.SIGKILL) killer = TaskRunner.get(runner.task_id, runner.root) assert killer is not None killer.kill(force=True) state = tm.get_state() assert len(state.processes["ignorant_process"]) == 1 assert state.processes["ignorant_process"][0].state == ProcessState.LOST
def test_coordinator_kill(self): runner = self.start_runner() tm = TaskMonitor(runner.pathspec, runner.task_id) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == 'ignorant_process' assert run_number == 0 os.kill(process_state.coordinator_pid, signal.SIGKILL) while True: active_procs = tm.get_active_processes() if active_procs and active_procs[0][1] > 0: break time.sleep(0.2) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == 'ignorant_process' assert run_number == 1 os.kill(process_state.pid, signal.SIGKILL) while True: active_procs = tm.get_active_processes() if active_procs and active_procs[0][1] > 1: break time.sleep(0.2) self.wait_until_running(tm) os.kill(runner.po.pid, signal.SIGKILL) try: state = tm.get_state() assert state.processes['ignorant_process'][ 0].state == ProcessState.LOST assert state.processes['ignorant_process'][ 1].state == ProcessState.KILLED assert state.processes['ignorant_process'][ 2].state == ProcessState.RUNNING finally: os.kill(state.processes['ignorant_process'][2].coordinator_pid, signal.SIGKILL) os.kill(state.processes['ignorant_process'][2].pid, signal.SIGKILL)
def test_coordinator_dead_kill(self): runner = self.start_runner() tm = TaskMonitor(runner.pathspec, runner.task_id) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == 'ignorant_process' assert run_number == 0 os.kill(runner.po.pid, signal.SIGKILL) os.kill(process_state.coordinator_pid, signal.SIGKILL) os.kill(process_state.pid, signal.SIGKILL) killer = TaskRunner.get(runner.task_id, runner.root) assert killer is not None killer.kill(force=True) state = tm.get_state() assert len(state.processes['ignorant_process']) == 1 assert state.processes['ignorant_process'][ 0].state == ProcessState.LOST
def test_basic(self): proxy_driver = ProxyDriver() with temporary_dir() as tempdir: te = ThermosExecutor( runner_provider=make_provider(tempdir), sandbox_provider=DefaultTestSandboxProvider) te.launchTask(proxy_driver, make_task(HELLO_WORLD_MTI)) te.terminated.wait() tm = TaskMonitor(TaskPath(root=tempdir), task_id=HELLO_WORLD_TASK_ID) runner_state = tm.get_state() assert 'hello_world_hello_world-001' in runner_state.processes, ( 'Could not find processes, got: %s' % ' '.join(runner_state.processes)) updates = proxy_driver.method_calls['sendStatusUpdate'] assert len(updates) == 3 status_updates = [arg_tuple[0][0] for arg_tuple in updates] assert status_updates[0].state == mesos_pb.TASK_STARTING assert status_updates[1].state == mesos_pb.TASK_RUNNING assert status_updates[2].state == mesos_pb.TASK_FINISHED
def test_pg_is_killed(self): runner = self.start_runner() tm = TaskMonitor(runner.pathspec, runner.task_id) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == 'process' assert run_number == 0 child_pidfile = os.path.join(runner.sandbox, runner.task_id, 'child.txt') while not os.path.exists(child_pidfile): time.sleep(0.1) parent_pidfile = os.path.join(runner.sandbox, runner.task_id, 'parent.txt') while not os.path.exists(parent_pidfile): time.sleep(0.1) with open(child_pidfile) as fp: child_pid = int(fp.read().rstrip()) with open(parent_pidfile) as fp: parent_pid = int(fp.read().rstrip()) ps = ProcessProviderFactory.get() ps.collect_all() assert parent_pid in ps.pids() assert child_pid in ps.pids() assert child_pid in ps.children_of(parent_pid) with open(os.path.join(runner.sandbox, runner.task_id, 'exit.txt'), 'w') as fp: fp.write('go away!') while tm.task_state() is not TaskState.SUCCESS: time.sleep(0.1) state = tm.get_state() assert state.processes['process'][0].state == ProcessState.SUCCESS ps.collect_all() assert parent_pid not in ps.pids() assert child_pid not in ps.pids()
def test_coordinator_kill(self): runner = self.start_runner() tm = TaskMonitor(runner.pathspec, runner.task_id) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == "ignorant_process" assert run_number == 0 os.kill(process_state.coordinator_pid, signal.SIGKILL) while True: active_procs = tm.get_active_processes() if active_procs and active_procs[0][1] > 0: break time.sleep(0.2) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == "ignorant_process" assert run_number == 1 os.kill(process_state.pid, signal.SIGKILL) while True: active_procs = tm.get_active_processes() if active_procs and active_procs[0][1] > 1: break time.sleep(0.2) self.wait_until_running(tm) os.kill(runner.po.pid, signal.SIGKILL) try: state = tm.get_state() assert state.processes["ignorant_process"][0].state == ProcessState.LOST assert state.processes["ignorant_process"][1].state == ProcessState.KILLED assert state.processes["ignorant_process"][2].state == ProcessState.RUNNING finally: os.kill(state.processes["ignorant_process"][2].coordinator_pid, signal.SIGKILL) os.kill(state.processes["ignorant_process"][2].pid, signal.SIGKILL)
def test_pg_is_killed(self): runner = self.start_runner() tm = TaskMonitor(runner.pathspec, runner.task_id) self.wait_until_running(tm) process_state, run_number = tm.get_active_processes()[0] assert process_state.process == "process" assert run_number == 0 child_pidfile = os.path.join(runner.sandbox, runner.task_id, "child.txt") while not os.path.exists(child_pidfile): time.sleep(0.1) parent_pidfile = os.path.join(runner.sandbox, runner.task_id, "parent.txt") while not os.path.exists(parent_pidfile): time.sleep(0.1) with open(child_pidfile) as fp: child_pid = int(fp.read().rstrip()) with open(parent_pidfile) as fp: parent_pid = int(fp.read().rstrip()) ps = ProcessProviderFactory.get() ps.collect_all() assert parent_pid in ps.pids() assert child_pid in ps.pids() assert child_pid in ps.children_of(parent_pid) with open(os.path.join(runner.sandbox, runner.task_id, "exit.txt"), "w") as fp: fp.write("go away!") while tm.task_state() is not TaskState.SUCCESS: time.sleep(0.1) state = tm.get_state() assert state.processes["process"][0].state == ProcessState.SUCCESS ps.collect_all() assert parent_pid not in ps.pids() assert child_pid not in ps.pids()