Пример #1
0
 def add_active_task(self, task_id):
   if task_id in self.finished_tasks:
     log.error('Found an active task (%s) in finished tasks?' % task_id)
     return
   task_monitor = TaskMonitor(self._pathspec, task_id)
   if not task_monitor.get_state().header:
     log.info('Unable to load task "%s"' % task_id)
     return
   sandbox = task_monitor.get_state().header.sandbox
   resource_monitor = self._resource_monitor(task_monitor, sandbox)
   resource_monitor.start()
   self._active_tasks[task_id] = ActiveObservedTask(
     task_id=task_id, pathspec=self._pathspec,
     task_monitor=task_monitor, resource_monitor=resource_monitor
   )
Пример #2
0
    def test_basic_as_job(self):
        proxy_driver = ProxyDriver()

        with temporary_dir() as tempdir:
            te = ThermosExecutor(runner_provider=make_provider(tempdir),
                                 sandbox_provider=DefaultTestSandboxProvider)
            te.launchTask(proxy_driver,
                          make_task(MESOS_JOB(task=HELLO_WORLD), instanceId=0))
            te.runner_started.wait()
            while te._status_manager is None:
                time.sleep(0.1)
            te.terminated.wait()
            tm = TaskMonitor(TaskPath(root=tempdir),
                             task_id=HELLO_WORLD_TASK_ID)
            runner_state = tm.get_state()

        assert 'hello_world_hello_world-001' in runner_state.processes, (
            'Could not find processes, got: %s' %
            ' '.join(runner_state.processes))
        updates = proxy_driver.method_calls['sendStatusUpdate']
        assert len(updates) == 3
        status_updates = [arg_tuple[0][0] for arg_tuple in updates]
        assert status_updates[0].state == mesos_pb.TASK_STARTING
        assert status_updates[1].state == mesos_pb.TASK_RUNNING
        assert status_updates[2].state == mesos_pb.TASK_FINISHED
Пример #3
0
    def test_coordinator_dead_kill(self):
        runner = self.start_runner()
        tm = TaskMonitor(runner.pathspec, runner.task_id)
        self.wait_until_running(tm)
        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == "ignorant_process"
        assert run_number == 0

        os.kill(runner.po.pid, signal.SIGKILL)
        os.kill(process_state.coordinator_pid, signal.SIGKILL)
        os.kill(process_state.pid, signal.SIGKILL)

        killer = TaskRunner.get(runner.task_id, runner.root)
        assert killer is not None
        killer.kill(force=True)

        state = tm.get_state()
        assert len(state.processes["ignorant_process"]) == 1
        assert state.processes["ignorant_process"][0].state == ProcessState.LOST
Пример #4
0
    def test_coordinator_kill(self):
        runner = self.start_runner()
        tm = TaskMonitor(runner.pathspec, runner.task_id)
        self.wait_until_running(tm)
        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == 'ignorant_process'
        assert run_number == 0
        os.kill(process_state.coordinator_pid, signal.SIGKILL)

        while True:
            active_procs = tm.get_active_processes()
            if active_procs and active_procs[0][1] > 0:
                break
            time.sleep(0.2)
        self.wait_until_running(tm)

        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == 'ignorant_process'
        assert run_number == 1
        os.kill(process_state.pid, signal.SIGKILL)

        while True:
            active_procs = tm.get_active_processes()
            if active_procs and active_procs[0][1] > 1:
                break
            time.sleep(0.2)
        self.wait_until_running(tm)

        os.kill(runner.po.pid, signal.SIGKILL)

        try:
            state = tm.get_state()
            assert state.processes['ignorant_process'][
                0].state == ProcessState.LOST
            assert state.processes['ignorant_process'][
                1].state == ProcessState.KILLED
            assert state.processes['ignorant_process'][
                2].state == ProcessState.RUNNING
        finally:
            os.kill(state.processes['ignorant_process'][2].coordinator_pid,
                    signal.SIGKILL)
            os.kill(state.processes['ignorant_process'][2].pid, signal.SIGKILL)
Пример #5
0
    def test_coordinator_dead_kill(self):
        runner = self.start_runner()
        tm = TaskMonitor(runner.pathspec, runner.task_id)
        self.wait_until_running(tm)
        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == 'ignorant_process'
        assert run_number == 0

        os.kill(runner.po.pid, signal.SIGKILL)
        os.kill(process_state.coordinator_pid, signal.SIGKILL)
        os.kill(process_state.pid, signal.SIGKILL)

        killer = TaskRunner.get(runner.task_id, runner.root)
        assert killer is not None
        killer.kill(force=True)

        state = tm.get_state()
        assert len(state.processes['ignorant_process']) == 1
        assert state.processes['ignorant_process'][
            0].state == ProcessState.LOST
  def test_basic(self):
    proxy_driver = ProxyDriver()

    with temporary_dir() as tempdir:
      te = ThermosExecutor(
          runner_provider=make_provider(tempdir),
          sandbox_provider=DefaultTestSandboxProvider)
      te.launchTask(proxy_driver, make_task(HELLO_WORLD_MTI))
      te.terminated.wait()
      tm = TaskMonitor(TaskPath(root=tempdir), task_id=HELLO_WORLD_TASK_ID)
      runner_state = tm.get_state()

    assert 'hello_world_hello_world-001' in runner_state.processes, (
      'Could not find processes, got: %s' % ' '.join(runner_state.processes))

    updates = proxy_driver.method_calls['sendStatusUpdate']
    assert len(updates) == 3
    status_updates = [arg_tuple[0][0] for arg_tuple in updates]
    assert status_updates[0].state == mesos_pb.TASK_STARTING
    assert status_updates[1].state == mesos_pb.TASK_RUNNING
    assert status_updates[2].state == mesos_pb.TASK_FINISHED
Пример #7
0
    def test_pg_is_killed(self):
        runner = self.start_runner()
        tm = TaskMonitor(runner.pathspec, runner.task_id)
        self.wait_until_running(tm)
        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == 'process'
        assert run_number == 0

        child_pidfile = os.path.join(runner.sandbox, runner.task_id,
                                     'child.txt')
        while not os.path.exists(child_pidfile):
            time.sleep(0.1)
        parent_pidfile = os.path.join(runner.sandbox, runner.task_id,
                                      'parent.txt')
        while not os.path.exists(parent_pidfile):
            time.sleep(0.1)
        with open(child_pidfile) as fp:
            child_pid = int(fp.read().rstrip())
        with open(parent_pidfile) as fp:
            parent_pid = int(fp.read().rstrip())

        ps = ProcessProviderFactory.get()
        ps.collect_all()
        assert parent_pid in ps.pids()
        assert child_pid in ps.pids()
        assert child_pid in ps.children_of(parent_pid)

        with open(os.path.join(runner.sandbox, runner.task_id, 'exit.txt'),
                  'w') as fp:
            fp.write('go away!')

        while tm.task_state() is not TaskState.SUCCESS:
            time.sleep(0.1)

        state = tm.get_state()
        assert state.processes['process'][0].state == ProcessState.SUCCESS

        ps.collect_all()
        assert parent_pid not in ps.pids()
        assert child_pid not in ps.pids()
Пример #8
0
    def test_coordinator_kill(self):
        runner = self.start_runner()
        tm = TaskMonitor(runner.pathspec, runner.task_id)
        self.wait_until_running(tm)
        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == "ignorant_process"
        assert run_number == 0
        os.kill(process_state.coordinator_pid, signal.SIGKILL)

        while True:
            active_procs = tm.get_active_processes()
            if active_procs and active_procs[0][1] > 0:
                break
            time.sleep(0.2)
        self.wait_until_running(tm)

        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == "ignorant_process"
        assert run_number == 1
        os.kill(process_state.pid, signal.SIGKILL)

        while True:
            active_procs = tm.get_active_processes()
            if active_procs and active_procs[0][1] > 1:
                break
            time.sleep(0.2)
        self.wait_until_running(tm)

        os.kill(runner.po.pid, signal.SIGKILL)

        try:
            state = tm.get_state()
            assert state.processes["ignorant_process"][0].state == ProcessState.LOST
            assert state.processes["ignorant_process"][1].state == ProcessState.KILLED
            assert state.processes["ignorant_process"][2].state == ProcessState.RUNNING
        finally:
            os.kill(state.processes["ignorant_process"][2].coordinator_pid, signal.SIGKILL)
            os.kill(state.processes["ignorant_process"][2].pid, signal.SIGKILL)
Пример #9
0
    def test_pg_is_killed(self):
        runner = self.start_runner()
        tm = TaskMonitor(runner.pathspec, runner.task_id)
        self.wait_until_running(tm)
        process_state, run_number = tm.get_active_processes()[0]
        assert process_state.process == "process"
        assert run_number == 0

        child_pidfile = os.path.join(runner.sandbox, runner.task_id, "child.txt")
        while not os.path.exists(child_pidfile):
            time.sleep(0.1)
        parent_pidfile = os.path.join(runner.sandbox, runner.task_id, "parent.txt")
        while not os.path.exists(parent_pidfile):
            time.sleep(0.1)
        with open(child_pidfile) as fp:
            child_pid = int(fp.read().rstrip())
        with open(parent_pidfile) as fp:
            parent_pid = int(fp.read().rstrip())

        ps = ProcessProviderFactory.get()
        ps.collect_all()
        assert parent_pid in ps.pids()
        assert child_pid in ps.pids()
        assert child_pid in ps.children_of(parent_pid)

        with open(os.path.join(runner.sandbox, runner.task_id, "exit.txt"), "w") as fp:
            fp.write("go away!")

        while tm.task_state() is not TaskState.SUCCESS:
            time.sleep(0.1)

        state = tm.get_state()
        assert state.processes["process"][0].state == ProcessState.SUCCESS

        ps.collect_all()
        assert parent_pid not in ps.pids()
        assert child_pid not in ps.pids()