示例#1
0
def test_child_main_thread():
    """
    The main thread in the child should be called "MainThread".
    """
    q = mp_context.Queue()
    proc = AsyncProcess(target=threads_info, args=(q,))
    yield proc.start()
    yield proc.join()
    n_threads = q.get()
    main_name = q.get()
    assert n_threads <= 3
    assert main_name == "MainThread"
    q.close()
    q._reader.close()
    q._writer.close()
示例#2
0
    def parent_process_coroutine():
        worker_ready = mp_context.Event()

        worker = AsyncProcess(target=_worker_process,
                              args=(worker_ready, child_pipe))

        yield worker.start()

        # Wait for the child process to have started.
        worker_ready.wait()

        # Exit immediately, without doing any process teardown (including atexit
        # and 'finally:' blocks) as if by SIGKILL. This should cause
        # worker_process to also exit.
        os._exit(255)
示例#3
0
def test_signal():
    proc = AsyncProcess(target=exit_with_signal, args=(signal.SIGINT,))
    proc.daemon = True
    assert not proc.is_alive()
    assert proc.exitcode is None

    yield proc.start()
    yield proc.join(timeout=3.0)

    assert not proc.is_alive()
    # Can be 255 with forkserver, see https://bugs.python.org/issue30589
    assert proc.exitcode in (-signal.SIGINT, 255)

    proc = AsyncProcess(target=wait)
    yield proc.start()
    os.kill(proc.pid, signal.SIGTERM)
    yield proc.join(timeout=3.0)

    assert not proc.is_alive()
    assert proc.exitcode in (-signal.SIGTERM, 255)
示例#4
0
def test_terminate():
    proc = AsyncProcess(target=wait)
    proc.daemon = True
    yield proc.start()
    yield proc.terminate()

    yield proc.join(timeout=3.0)
    assert not proc.is_alive()
    assert proc.exitcode in (-signal.SIGTERM, 255)
示例#5
0
def test_exitcode():
    q = mp_context.Queue()

    proc = AsyncProcess(target=exit, kwargs={'q': q})
    proc.daemon = True
    assert not proc.is_alive()
    assert proc.exitcode is None

    yield proc.start()
    assert proc.is_alive()
    assert proc.exitcode is None

    q.put(5)
    yield proc.join(timeout=3.0)
    assert not proc.is_alive()
    assert proc.exitcode == 5
示例#6
0
def test_num_fds():
    psutil = pytest.importorskip('psutil')

    # Warm up
    proc = AsyncProcess(target=exit_now)
    proc.daemon = True
    yield proc.start()
    yield proc.join()

    p = psutil.Process()
    before = p.num_fds()

    proc = AsyncProcess(target=exit_now)
    proc.daemon = True
    yield proc.start()
    yield proc.join()
    assert not proc.is_alive()
    assert proc.exitcode == 0

    start = time()
    while p.num_fds() > before:
        yield gen.sleep(0.1)
        print("fds:", before, p.num_fds())
        assert time() < start + 10
示例#7
0
def test_close():
    proc = AsyncProcess(target=exit_now)
    proc.close()
    with pytest.raises(ValueError):
        yield proc.start()

    proc = AsyncProcess(target=exit_now)
    yield proc.start()
    proc.close()
    with pytest.raises(ValueError):
        yield proc.terminate()

    proc = AsyncProcess(target=exit_now)
    yield proc.start()
    yield proc.join()
    proc.close()
    with pytest.raises(ValueError):
        yield proc.join()
    proc.close()
示例#8
0
def test_signal():
    proc = AsyncProcess(target=exit_with_signal, args=(signal.SIGINT, ))
    proc.daemon = True
    assert not proc.is_alive()
    assert proc.exitcode is None

    yield proc.start()
    yield proc.join(timeout=3.0)

    assert not proc.is_alive()
    # Can be 255 with forkserver, see https://bugs.python.org/issue30589
    assert proc.exitcode in (-signal.SIGINT, 255)

    proc = AsyncProcess(target=wait)
    yield proc.start()
    os.kill(proc.pid, signal.SIGTERM)
    yield proc.join(timeout=3.0)

    assert not proc.is_alive()
    assert proc.exitcode in (-signal.SIGTERM, 255)
def test_simple():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()

    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    assert not proc.is_alive()
    assert proc.pid is None
    assert proc.exitcode is None
    assert not proc.daemon
    proc.daemon = True
    assert proc.daemon

    wr1 = weakref.ref(proc)
    wr2 = weakref.ref(proc._process)

    # join() before start()
    with pytest.raises(AssertionError):
        yield proc.join()

    yield proc.start()
    assert proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode is None

    t1 = time()
    yield proc.join(timeout=0.02)
    dt = time() - t1
    assert 0.2 >= dt >= 0.01
    assert proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode is None

    # setting daemon attribute after start()
    with pytest.raises(AssertionError):
        proc.daemon = False

    to_child.put(5)
    assert from_child.get() == 5

    # child should be stopping now
    t1 = time()
    yield proc.join(timeout=10)
    dt = time() - t1
    assert dt <= 1.0
    assert not proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode == 0

    # join() again
    t1 = time()
    yield proc.join()
    dt = time() - t1
    assert dt <= 0.6

    del proc
    gc.collect()
    start = time()
    while wr1() is not None and time() < start + 1:
        # Perhaps the GIL switched before _watch_process() exit,
        # help it a little
        sleep(0.001)
        gc.collect()
    if wr1() is not None:
        # Help diagnosing
        from types import FrameType
        p = wr1()
        if p is not None:
            rc = sys.getrefcount(p)
            refs = gc.get_referrers(p)
            del p
            print("refs to proc:", rc, refs)
            frames = [r for r in refs if isinstance(r, FrameType)]
            for i, f in enumerate(frames):
                print("frames #%d:" % i, f.f_code.co_name,
                      f.f_code.co_filename, sorted(f.f_locals))
        pytest.fail("AsyncProcess should have been destroyed")
    t1 = time()
    while wr2() is not None:
        yield gen.sleep(0.01)
        gc.collect()
        dt = time() - t1
        assert dt < 2.0
def test_terminate_after_stop():
    proc = AsyncProcess(target=sleep, args=(0, ))
    yield proc.start()
    yield gen.sleep(0.1)
    yield proc.terminate()
示例#11
0
async def test_terminate_after_stop():
    proc = AsyncProcess(target=sleep, args=(0, ))
    await proc.start()
    await asyncio.sleep(0.1)
    await proc.terminate()
示例#12
0
class Scheduler(ProcessInterface):
    def __init__(self, env=None, *args, **kwargs):
        super().__init__()

        self.args = args
        self.kwargs = kwargs
        self.proc_cls = _Scheduler
        self.process = None
        self.env = env or {}

    def __repr__(self):
        self.child_info_stop_q.put({"op": "info"})
        try:
            msg = self.parent_info_q.get(timeout=3000)
        except Empty:
            pass
        else:
            assert msg.pop("op") == "info"
            return "<Scheduler: '%s' processes: %d cores: %d>" % (
                self.address,
                msg.pop("workers"),
                msg.pop("total_nthreads"),
            )

    async def _wait_until_started(self):
        delay = 0.05
        while True:
            if self.status != "starting":
                return
            try:
                msg = self.init_result_q.get_nowait()
            except Empty:
                await gen.sleep(delay)
                continue

            if "exception" in msg:
                logger.error(
                    "Failed while trying to start scheduler process: %s",
                    msg["exception"],
                )
                await self.process.join()
                raise msg
            else:
                return msg

    async def start(self):
        if self.status == "running":
            return self.status
        if self.status == "starting":
            await self.running.wait()
            return self.status

        self.init_result_q = init_q = mp_context.Queue()
        self.child_info_stop_q = mp_context.Queue()
        self.parent_info_q = mp_context.Queue()

        self.process = AsyncProcess(
            target=self._run,
            name="Dask CUDA Scheduler process",
            kwargs=dict(
                proc_cls=self.proc_cls,
                kwargs=self.kwargs,
                silence_logs=False,
                init_result_q=self.init_result_q,
                child_info_stop_q=self.child_info_stop_q,
                parent_info_q=self.parent_info_q,
                env=self.env,
            ),
        )
        # self.process.daemon = dask.config.get("distributed.worker.daemon", default=True)
        self.process.set_exit_callback(self._on_exit)
        self.running = Event()
        self.stopped = Event()
        self.status = "starting"
        try:
            await self.process.start()
        except OSError:
            logger.exception("Failed to start CUDA Scheduler process",
                             exc_info=True)
            self.process.terminate()
            return

        msg = await self._wait_until_started()
        if not msg:
            return self.status
        self.address = msg["address"]
        assert self.address
        self.status = "running"
        self.running.set()

        init_q.close()

        await super().start()

    def _on_exit(self, proc):
        if proc is not self.process:
            return
        self.mark_stopped()

    def _death_message(self, pid, exitcode):
        assert exitcode is not None
        if exitcode == 255:
            return "Scheduler process %d was killed by unknown signal" % (
                pid, )
        elif exitcode >= 0:
            return "Scheduler process %d exited with status %d" % (pid,
                                                                   exitcode)
        else:
            return "Scheduler process %d was killed by signal %d" % (pid,
                                                                     -exitcode)

    def mark_stopped(self):
        if self.status != "stopped":
            r = self.process.exitcode
            assert r is not None
            if r != 0:
                msg = self._death_message(self.process.pid, r)
                logger.info(msg)
            self.status = "stopped"
            self.stopped.set()
            # Release resources
            self.process.close()
            self.init_result_q = None
            self.child_info_stop_q = None
            self.parent_info_q = None
            self.process = None

    async def close(self):
        timeout = 2
        loop = IOLoop.current()
        deadline = loop.time() + timeout
        if self.status == "closing":
            await self.finished()
            assert self.status == "closed"

        if self.status == "closed":
            return

        try:
            if self.process is not None:
                #await self.kill()
                process = self.process
                self.child_info_stop_q.put({
                    "op":
                    "stop",
                    "timeout":
                    max(0, deadline - loop.time()) * 0.8,
                })
                self.child_info_stop_q.close()
                self.parent_info_q.close()

                while process.is_alive() and loop.time() < deadline:
                    await gen.sleep(0.05)

                if process.is_alive():
                    logger.warning(
                        "Scheduler process still alive after %d seconds, killing",
                        timeout)
                    try:
                        await process.terminate()
                    except Exception as e:
                        logger.error("Failed to kill scheduler process: %s", e)
        except Exception:
            pass
        self.process = None
        self.status = "closed"
        await super().close()

    @classmethod
    def _run(
        cls,
        silence_logs,
        init_result_q,
        child_info_stop_q,
        parent_info_q,
        proc_cls,
        kwargs,
        env,
    ):  # pragma: no cover
        os.environ.update(env)

        if silence_logs:
            logger.setLevel(silence_logs)

        IOLoop.clear_instance()
        loop = IOLoop()
        loop.make_current()
        scheduler = proc_cls(**kwargs)

        async def do_stop(timeout=5):
            try:
                await scheduler.close(comm=None,
                                      fast=False,
                                      close_workers=False)
            finally:
                loop.stop()

        def watch_stop_q():
            """
            Wait for an incoming stop message and then stop the
            scheduler cleanly.
            """
            while True:
                try:
                    msg = child_info_stop_q.get(timeout=1000)
                except Empty:
                    pass
                else:
                    op = msg.pop("op")
                    assert op == "stop" or op == "info"
                    if op == "stop":
                        child_info_stop_q.close()
                        loop.add_callback(do_stop, **msg)
                        break
                    elif op == "info":
                        parent_info_q.put({
                            "op":
                            "info",
                            "workers":
                            len(scheduler.workers),
                            "total_nthreads":
                            scheduler.total_nthreads,
                        })

        t = threading.Thread(target=watch_stop_q,
                             name="Scheduler stop queue watch")
        t.daemon = True
        t.start()

        async def run():
            """
            Try to start scheduler and inform parent of outcome.
            """
            try:
                await scheduler.start()
            except Exception as e:
                logger.exception("Failed to start scheduler")
                init_result_q.put({"exception": e})
                init_result_q.close()
            else:
                try:
                    assert scheduler.address
                except ValueError:
                    pass
                else:
                    init_result_q.put({"address": scheduler.address})
                    init_result_q.close()
                    await scheduler.finished()
                    logger.info("Scheduler closed")

        try:
            loop.run_sync(run)
        except TimeoutError:
            # Loop was stopped before wait_until_closed() returned, ignore
            pass
        except KeyboardInterrupt:
            pass
示例#13
0
def test_terminate_after_stop():
    proc = AsyncProcess(target=sleep, args=(0,))
    yield proc.start()
    yield gen.sleep(0.1)
    yield proc.terminate()
示例#14
0
def test_exit_callback():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()
    evt = Event()

    @gen.coroutine
    def on_stop(_proc):
        assert _proc is proc
        yield gen.moment
        evt.set()

    # Normal process exit
    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    yield proc.start()
    yield gen.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    to_child.put(None)
    yield evt.wait(timedelta(seconds=3))
    assert evt.is_set()
    assert not proc.is_alive()

    # Process terminated
    proc = AsyncProcess(target=wait)
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    yield proc.start()
    yield gen.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    yield proc.terminate()
    yield evt.wait(timedelta(seconds=3))
    assert evt.is_set()
示例#15
0
def test_close():
    proc = AsyncProcess(target=exit_now)
    proc.close()
    with pytest.raises(ValueError):
        yield proc.start()

    proc = AsyncProcess(target=exit_now)
    yield proc.start()
    proc.close()
    with pytest.raises(ValueError):
        yield proc.terminate()

    proc = AsyncProcess(target=exit_now)
    yield proc.start()
    yield proc.join()
    proc.close()
    with pytest.raises(ValueError):
        yield proc.join()
    proc.close()
示例#16
0
def test_exit_callback():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()
    evt = Event()

    @gen.coroutine
    def on_stop(_proc):
        assert _proc is proc
        yield gen.moment
        evt.set()

    # Normal process exit
    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    yield proc.start()
    yield gen.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    to_child.put(None)
    yield evt.wait(timedelta(seconds=3))
    assert evt.is_set()
    assert not proc.is_alive()

    # Process terminated
    proc = AsyncProcess(target=wait)
    evt.clear()
    proc.set_exit_callback(on_stop)
    proc.daemon = True

    yield proc.start()
    yield gen.sleep(0.05)
    assert proc.is_alive()
    assert not evt.is_set()

    yield proc.terminate()
    yield evt.wait(timedelta(seconds=3))
    assert evt.is_set()
示例#17
0
def test_num_fds():
    psutil = pytest.importorskip("psutil")

    # Warm up
    proc = AsyncProcess(target=exit_now)
    proc.daemon = True
    yield proc.start()
    yield proc.join()

    p = psutil.Process()
    before = p.num_fds()

    proc = AsyncProcess(target=exit_now)
    proc.daemon = True
    yield proc.start()
    yield proc.join()
    assert not proc.is_alive()
    assert proc.exitcode == 0

    start = time()
    while p.num_fds() > before:
        yield gen.sleep(0.1)
        print("fds:", before, p.num_fds())
        assert time() < start + 10
示例#18
0
def test_simple():
    to_child = mp_context.Queue()
    from_child = mp_context.Queue()

    proc = AsyncProcess(target=feed, args=(to_child, from_child))
    assert not proc.is_alive()
    assert proc.pid is None
    assert proc.exitcode is None
    assert not proc.daemon
    proc.daemon = True
    assert proc.daemon

    wr1 = weakref.ref(proc)
    wr2 = weakref.ref(proc._process)

    # join() before start()
    with pytest.raises(AssertionError):
        yield proc.join()

    yield proc.start()
    assert proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode is None

    t1 = time()
    yield proc.join(timeout=0.02)
    dt = time() - t1
    assert 0.2 >= dt >= 0.01
    assert proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode is None

    # setting daemon attribute after start()
    with pytest.raises(AssertionError):
        proc.daemon = False

    to_child.put(5)
    assert from_child.get() == 5

    # child should be stopping now
    t1 = time()
    yield proc.join(timeout=10)
    dt = time() - t1
    assert dt <= 1.0
    assert not proc.is_alive()
    assert proc.pid is not None
    assert proc.exitcode == 0

    # join() again
    t1 = time()
    yield proc.join()
    dt = time() - t1
    assert dt <= 0.6

    del proc
    gc.collect()
    start = time()
    while wr1() is not None and time() < start + 1:
        # Perhaps the GIL switched before _watch_process() exit,
        # help it a little
        sleep(0.001)
        gc.collect()
    if wr1() is not None:
        # Help diagnosing
        from types import FrameType
        p = wr1()
        if p is not None:
            rc = sys.getrefcount(p)
            refs = gc.get_referrers(p)
            del p
            print("refs to proc:", rc, refs)
            frames = [r for r in refs if isinstance(r, FrameType)]
            for i, f in enumerate(frames):
                print("frames #%d:" % i,
                      f.f_code.co_name, f.f_code.co_filename, sorted(f.f_locals))
        pytest.fail("AsyncProcess should have been destroyed")
    t1 = time()
    while wr2() is not None:
        yield gen.sleep(0.01)
        gc.collect()
        dt = time() - t1
        assert dt < 2.0