class JudgeManager(object): signal_map = { k: v for v, k in sorted(signal.__dict__.items(), reverse=True) if v.startswith('SIG') and not v.startswith('SIG_') } def __init__(self, judges): self.libc = self.__get_libc() self.prctl = self.libc.prctl self._try_respawn = True self.auth = {entry.id: entry.key for entry in judges} self.orig_signal = {} self.master_pid = os.getpid() self.pids = {} self.monitor_pid = None self.api_pid = None self.monitor = Monitor() self.monitor.callback = lambda: os.kill(self.master_pid, signal.SIGUSR2 ) def __get_libc(self): from ctypes.util import find_library from ctypes import CDLL return CDLL(find_library('c')) def _forward_signal(self, sig, respawn=False): def handler(signum, frame): print >> sys.stderr, 'judgepm: Received signal (%s), forwarding...' % self.signal_map.get( signum, signum) if not respawn: print >> sys.stderr, 'judgepm: Will no longer respawn judges.' self._try_respawn = False self.signal_all(signum) self.orig_signal[sig] = signal.signal(sig, handler) def _spawn_child(self, func, *args, **kwargs): sys.stdout.flush() sys.stderr.flush() ppid = os.getpid() try: pid = os.fork() except OSError: print >> sys.stderr, 'judgepm: Failed to spawn judge:', id return if pid == 0: # In child. Scary business. self.prctl(PR_SET_PDEATHSIG, signal.SIGTERM) if ppid != os.getppid(): os.kill(os.getpid(), signal.SIGTERM) os._exit(2) sys.stdin.close() for sig, handler in self.orig_signal.iteritems(): signal.signal(sig, handler) # How could we possibly return to top level? try: os._exit(func(*args, **kwargs) or 0) finally: os._exit( 1 ) # If that os._exit fails because ret is a truthy non-int, then this will ensure death. return pid def _judge_proc(self, id): env['id'] = id env['key'] = self.auth[id] try: return judge_proc(False) except BaseException: return 1 finally: sys.stdout.flush() sys.stderr.flush() logging.shutdown() def _spawn_judge(self, id): pid = self._spawn_child(self._judge_proc, id) self.pids[pid] = id def _spawn_monitor(self): def monitor_proc(): signal.signal(signal.SIGUSR2, signal.SIG_IGN) self.monitor.start() try: self.monitor.join() except KeyboardInterrupt: self.monitor.stop() self.monitor_pid = self._spawn_child(monitor_proc) def _spawn_api(self): from dmoj import judgeenv from BaseHTTPServer import HTTPServer master_pid = self.master_pid class Handler(JudgeControlRequestHandler): def update_problems(self): os.kill(master_pid, signal.SIGUSR2) server = HTTPServer(judgeenv.api_listen, Handler) def api_proc(): signal.signal(signal.SIGUSR2, signal.SIG_IGN) server.serve_forever() self.api_pid = self._spawn_child(api_proc) def _spawn_all(self): from dmoj import judgeenv for id in self.auth: print >> sys.stderr, 'judgepm: Spawning judge:', id self._spawn_judge(id) if self.monitor.is_real: print >> sys.stderr, 'judgepm: Spawning monitor' self._spawn_monitor() if judgeenv.api_listen is not None: print >> sys.stderr, 'judgepm: Spawning API server' self._spawn_api() def _monitor(self): while self._try_respawn or self.pids: try: pid, status = os.wait() except (OSError, IOError) as e: if e.errno == errno.EINTR: continue raise if not os.WIFSIGNALED(status) and not os.WIFEXITED(status): continue if pid in self.pids: # A child just died. judge = self.pids[pid] del self.pids[pid] if self._try_respawn: print >> sys.stderr, 'judgepm: Judge died, respawning: %s (0x%08X)' % ( judge, status) self._spawn_judge(judge) else: print >> sys.stderr, 'judgepm: Judge exited: %s (0x%08X)' % ( judge, status) elif pid == self.monitor_pid: if self._try_respawn: print >> sys.stderr, 'judgepm: Monitor died, respawning (0x%08X)' % status self._spawn_monitor() else: print >> sys.stderr, 'judgepm: Monitor exited: (0x%08X)' % status elif pid == self.api_pid: if self._try_respawn: print >> sys.stderr, 'judgepm: API server died, respawning (0x%08X)' % status self._spawn_api() else: print >> sys.stderr, 'judgepm: API server exited: (0x%08X)' % status else: print >> sys.stderr, 'judgepm: I am not your father, %d (0x%08X)!' % ( pid, status) def run(self): print >> sys.stderr, 'judgepm: Starting process manager: %d.' % os.getpid( ) self._forward_signal(signal.SIGUSR2, respawn=True) self._forward_signal(signal.SIGINT) self._forward_signal(signal.SIGHUP) self._forward_signal(signal.SIGQUIT) self._forward_signal(signal.SIGTERM) self._spawn_all() try: self._monitor() except KeyboardInterrupt: self._try_respawn = False self.signal_all(signal.SIGINT) self._monitor() print >> sys.stderr, 'judgepm: Exited gracefully: %d.' % os.getpid() def signal_all(self, signum): for pid in chain(self.pids, [self.monitor_pid, self.api_pid]): if pid is None: continue try: os.kill(pid, signum) except OSError as e: if e.errno != errno.ESRCH: raise
class JudgeManager(object): signal_map = { k: v for v, k in sorted(signal.__dict__.items(), reverse=True) if v.startswith('SIG') and not v.startswith('SIG_') } def __init__(self, judges): self.libc = self.__get_libc() self.prctl = self.libc.prctl self._try_respawn = True self.auth = {entry.id: entry.key for entry in judges} self.orig_signal = {} self.master_pid = os.getpid() self.pids = {} self.monitor_pid = None self.api_pid = None self.monitor = Monitor() def __get_libc(self): from ctypes.util import find_library from ctypes import CDLL return CDLL(find_library('c')) def _forward_signal(self, sig, respawn=False): def handler(signum, frame): # SIGUSR2, the signal for file updates, may be triggered very quickly. # Due to processing delays, it may cause reentrancy issues when logging. # Band-aid fix is to avoid logging SIGUSR2. if signum not in (signal.SIGUSR2, ): logpm.info('Received signal (%s), forwarding...', self.signal_map.get(signum, signum)) if not respawn: logpm.info('Will no longer respawn judges.') self._try_respawn = False self.signal_all(signum) self.orig_signal[sig] = signal.signal(sig, handler) def _spawn_child(self, func, *args, **kwargs): sys.stdout.flush() sys.stderr.flush() ppid = os.getpid() # Pipe to signal signal handler initialization. pr, pw = os.pipe() try: pid = os.fork() except OSError: logpm.exception('Failed to spawn child process.') return if pid == 0: # In child. Scary business. self.prctl(PR_SET_PDEATHSIG, signal.SIGTERM) if ppid != os.getppid(): os.kill(os.getpid(), signal.SIGTERM) os._exit(2) sys.stdin.close() os.close(pr) for sig, handler in self.orig_signal.items(): signal.signal(sig, handler) os.close(pw) # How could we possibly return to top level? try: os._exit(func(*args, **kwargs) or 0) finally: os._exit( 1 ) # If that os._exit fails because ret is a truthy non-int, then this will ensure death. # In parent. os.close(pw) # Block until child initializes signals before we register this child to receive signals. while True: try: os.read(pr, 1) except OSError as e: if e.errno != errno.EINTR: raise else: break os.close(pr) return pid def _judge_proc(self, id): env['id'] = id env['key'] = self.auth[id] try: return judge_proc(False) except BaseException: return 1 finally: sys.stdout.flush() sys.stderr.flush() logging.shutdown() def _spawn_judge(self, id): pid = self._spawn_child(self._judge_proc, id) self.pids[pid] = id logpm.info('Judge %s is pid %d', id, pid) def _spawn_monitor(self): def monitor_proc(): setproctitle('DMOJ Judge: File monitor') signal.signal(signal.SIGUSR2, signal.SIG_IGN) event = threading.Event() stop = False def worker(): while True: event.wait() event.clear() if stop: return event.wait(1) if event.is_set(): continue os.kill(self.master_pid, signal.SIGUSR2) threading.Thread(target=worker).start() self.monitor.callback = event.set self.monitor.start() try: self.monitor.join() except KeyboardInterrupt: self.monitor.stop() stop = True event.set() self.monitor_pid = self._spawn_child(monitor_proc) logpm.info('Monitor is pid %d', self.monitor_pid) def _spawn_api(self): from dmoj import judgeenv master_pid = self.master_pid class Handler(JudgeControlRequestHandler): def update_problems(self): os.kill(master_pid, signal.SIGUSR2) server = HTTPServer(judgeenv.api_listen, Handler) def api_proc(): setproctitle('DMOJ Judge: API server') signal.signal(signal.SIGUSR2, signal.SIG_IGN) server.serve_forever() self.api_pid = self._spawn_child(api_proc) logpm.info('API server is pid %d', self.api_pid) def _spawn_all(self): from dmoj import judgeenv for id in self.auth: logpm.info('Spawning judge: %s', id) self._spawn_judge(id) if self.monitor.is_real: logpm.info('Spawning monitor') self._spawn_monitor() if judgeenv.api_listen is not None: logpm.info('Spawning API server') self._spawn_api() def _monitor(self): while self._try_respawn or self.pids: try: pid, status = os.wait() except (OSError, IOError) as e: if e.errno == errno.EINTR: continue raise if not os.WIFSIGNALED(status) and not os.WIFEXITED(status): continue if pid in self.pids: # A child just died. judge = self.pids[pid] del self.pids[pid] if self._try_respawn: logpm.warning( 'Judge died, respawning: %s (pid %d, 0x%08X)', judge, pid, status) self._spawn_judge(judge) else: logpm.info('Judge exited: %s (pid %d, 0x%08X)', judge, pid, status) elif pid == self.monitor_pid: if self._try_respawn: logpm.warning('Monitor died, respawning (0x%08X)', status) self._spawn_monitor() else: logpm.info('Monitor exited: (0x%08X)', status) elif pid == self.api_pid: if self._try_respawn: logpm.warning('API server died, respawning (0x%08X)', status) self._spawn_api() else: logpm.info('API server exited: (0x%08X)', status) else: logpm.error('I am not your father, %d (0x%08X)!', pid, status) def _respawn_judges(self, signum, frame): logpm.info('Received signal (%s), murderizing all children.', self.signal_map.get(signum, signum)) self.signal_all(signal.SIGTERM) def run(self): logpm.info('Starting process manager: %d.', os.getpid()) from dmoj import judgeenv setproctitle('DMOJ Judge: Process manager on %s' % (make_host_port(judgeenv), )) self._forward_signal(signal.SIGUSR2, respawn=True) self._forward_signal(signal.SIGINT) self._forward_signal(signal.SIGQUIT) self._forward_signal(signal.SIGTERM) signal.signal(signal.SIGHUP, self._respawn_judges) self._spawn_all() try: self._monitor() except KeyboardInterrupt: self._try_respawn = False self.signal_all(signal.SIGINT) self._monitor() logpm.info('Exited gracefully: %d.', os.getpid()) def signal_all(self, signum): for pid in chain(self.pids, [self.monitor_pid, self.api_pid]): if pid is None: continue try: os.kill(pid, signum) except OSError as e: if e.errno != errno.ESRCH: raise
class JudgeManager(object): signal_map = {k: v for v, k in sorted(signal.__dict__.items(), reverse=True) if v.startswith('SIG') and not v.startswith('SIG_')} def __init__(self, judges): self.libc = self.__get_libc() self.prctl = self.libc.prctl self._try_respawn = True self.auth = {entry.id: entry.key for entry in judges} self.orig_signal = {} self.master_pid = os.getpid() self.pids = {} self.monitor_pid = None self.api_pid = None self.monitor = Monitor() def __get_libc(self): from ctypes.util import find_library from ctypes import CDLL return CDLL(find_library('c')) def _forward_signal(self, sig, respawn=False): def handler(signum, frame): # SIGUSR2, the signal for file updates, may be triggered very quickly. # Due to processing delays, it may cause reentrancy issues when logging. # Band-aid fix is to avoid logging SIGUSR2. if signum not in (signal.SIGUSR2,): logpm.info('Received signal (%s), forwarding...', self.signal_map.get(signum, signum)) if not respawn: logpm.info('Will no longer respawn judges.') self._try_respawn = False self.signal_all(signum) self.orig_signal[sig] = signal.signal(sig, handler) def _spawn_child(self, func, *args, **kwargs): sys.stdout.flush() sys.stderr.flush() ppid = os.getpid() # Pipe to signal signal handler initialization. pr, pw = os.pipe() try: pid = os.fork() except OSError: logpm.exception('Failed to spawn child process.') return if pid == 0: # In child. Scary business. self.prctl(PR_SET_PDEATHSIG, signal.SIGTERM) if ppid != os.getppid(): os.kill(os.getpid(), signal.SIGTERM) os._exit(2) sys.stdin.close() os.close(pr) for sig, handler in self.orig_signal.items(): signal.signal(sig, handler) os.close(pw) # How could we possibly return to top level? try: os._exit(func(*args, **kwargs) or 0) finally: os._exit(1) # If that os._exit fails because ret is a truthy non-int, then this will ensure death. # In parent. os.close(pw) # Block until child initializes signals before we register this child to receive signals. while True: try: os.read(pr, 1) except OSError as e: if e.errno != errno.EINTR: raise else: break os.close(pr) return pid def _judge_proc(self, id): env['id'] = id env['key'] = self.auth[id] try: return judge_proc(False) except BaseException: return 1 finally: sys.stdout.flush() sys.stderr.flush() logging.shutdown() def _spawn_judge(self, id): pid = self._spawn_child(self._judge_proc, id) self.pids[pid] = id logpm.info('Judge %s is pid %d', id, pid) def _spawn_monitor(self): def monitor_proc(): setproctitle('DMOJ Judge: File monitor') signal.signal(signal.SIGUSR2, signal.SIG_IGN) event = threading.Event() stop = False def worker(): while True: event.wait() event.clear() if stop: return event.wait(1) if event.is_set(): continue os.kill(self.master_pid, signal.SIGUSR2) threading.Thread(target=worker).start() self.monitor.callback = event.set self.monitor.start() try: self.monitor.join() except KeyboardInterrupt: self.monitor.stop() stop = True event.set() self.monitor_pid = self._spawn_child(monitor_proc) logpm.info('Monitor is pid %d', self.monitor_pid) def _spawn_api(self): from dmoj import judgeenv master_pid = self.master_pid class Handler(JudgeControlRequestHandler): def update_problems(self): os.kill(master_pid, signal.SIGUSR2) server = HTTPServer(judgeenv.api_listen, Handler) def api_proc(): setproctitle('DMOJ Judge: API server') signal.signal(signal.SIGUSR2, signal.SIG_IGN) server.serve_forever() self.api_pid = self._spawn_child(api_proc) logpm.info('API server is pid %d', self.api_pid) def _spawn_all(self): from dmoj import judgeenv for id in self.auth: logpm.info('Spawning judge: %s', id) self._spawn_judge(id) if self.monitor.is_real: logpm.info('Spawning monitor') self._spawn_monitor() if judgeenv.api_listen is not None: logpm.info('Spawning API server') self._spawn_api() def _monitor(self): while self._try_respawn or self.pids: try: pid, status = os.wait() except (OSError, IOError) as e: if e.errno == errno.EINTR: continue raise if not os.WIFSIGNALED(status) and not os.WIFEXITED(status): continue if pid in self.pids: # A child just died. judge = self.pids[pid] del self.pids[pid] if self._try_respawn: logpm.warning('Judge died, respawning: %s (pid %d, 0x%08X)', judge, pid, status) self._spawn_judge(judge) else: logpm.info('Judge exited: %s (pid %d, 0x%08X)', judge, pid, status) elif pid == self.monitor_pid: if self._try_respawn: logpm.warning('Monitor died, respawning (0x%08X)', status) self._spawn_monitor() else: logpm.info('Monitor exited: (0x%08X)', status) elif pid == self.api_pid: if self._try_respawn: logpm.warning('API server died, respawning (0x%08X)', status) self._spawn_api() else: logpm.info('API server exited: (0x%08X)', status) else: logpm.error('I am not your father, %d (0x%08X)!', pid, status) def _respawn_judges(self, signum, frame): logpm.info('Received signal (%s), murderizing all children.', self.signal_map.get(signum, signum)) self.signal_all(signal.SIGTERM) def run(self): logpm.info('Starting process manager: %d.', os.getpid()) from dmoj import judgeenv setproctitle('DMOJ Judge: Process manager on %s' % (make_host_port(judgeenv),)) self._forward_signal(signal.SIGUSR2, respawn=True) self._forward_signal(signal.SIGINT) self._forward_signal(signal.SIGQUIT) self._forward_signal(signal.SIGTERM) signal.signal(signal.SIGHUP, self._respawn_judges) self._spawn_all() try: self._monitor() except KeyboardInterrupt: self._try_respawn = False self.signal_all(signal.SIGINT) self._monitor() logpm.info('Exited gracefully: %d.', os.getpid()) def signal_all(self, signum): for pid in chain(self.pids, [self.monitor_pid, self.api_pid]): if pid is None: continue try: os.kill(pid, signum) except OSError as e: if e.errno != errno.ESRCH: raise