def reap_workers(self): """\ Reap workers to avoid zombie processes """ try: while True: wpid, status = os.waitpid(-1, os.WNOHANG) if not wpid: break if self.reexec_pid == wpid: self.reexec_pid = 0 else: # A worker said it cannot boot. We'll shutdown # to avoid infinite start/stop cycles. exitcode = status >> 8 if exitcode == self.WORKER_BOOT_ERROR: reason = "Worker failed to boot." raise HaltServer(reason, self.WORKER_BOOT_ERROR) if exitcode == self.APP_LOAD_ERROR: reason = "App failed to load." raise HaltServer(reason, self.APP_LOAD_ERROR) worker = self.WORKERS.pop(wpid, None) if not worker: continue worker.tmp.close() except OSError as e: if e.errno != errno.ECHILD: raise
def reap_workers(self): """\ Reap workers to avoid zombie processes """ try: while True: wpid, status = os.waitpid(-1, os.WNOHANG) if not wpid: break if self.reexec_pid == wpid: self.reexec_pid = 0 else: # A worker was terminated. If the termination reason was # that it could not boot, we'll shut it down to avoid # infinite start/stop cycles. exitcode = status >> 8 if exitcode == self.WORKER_BOOT_ERROR: reason = "Worker failed to boot." raise HaltServer(reason, self.WORKER_BOOT_ERROR) if exitcode == self.APP_LOAD_ERROR: reason = "app failed to load." raise HaltServer(reason, self.APP_LOAD_ERROR) worker = self.WORKERS.pop(wpid, None) if not worker: continue worker.tmp.close() self.cfg.child_exit(self, worker) except OSError as e: if e.errno != errno.ECHILD: raise
def loadModel(): # look first for a .h5 file in argv, # then look for FF_MODEL in environment # then fail model = None global SESSION SESSION = tf.Session() K.set_session(SESSION) with SESSION.as_default(): for f in sys.argv[1:]: if '.h5' in f: logger.info('loading keras model from %s' % f) model = load_model(f) if model is None and os.environ.get('FF_MODEL', None) is not None: logger.info('loading keras model from %s' % os.environ['FF_MODEL']) model = load_model(os.environ['FF_MODEL']) else: msg = 'cannot find model file in sys.argv or in environment var "FF_MODEL"' logger.error(msg) raise HaltServer(msg) # necessary for multi-threads, for some reason model._make_predict_function() #GRAPH.finalize() # avoid modifications return model
def reap_workers(self, abc=''): """Reap workers to avoid zombie processes """ self._log('%s reap_workers' % abc) try: while True: # -1: meaning wait for any child process, 不一定都是 # workers, 也可能是 reexec 中 fork 得到的子进程 wpid, status = os.waitpid(-1, os.WNOHANG) if not wpid: break self._log('%s reap_workers wpid=%s reexec_pid=%s master_pid=%s' % (abc, wpid, self.reexec_pid, self.master_pid)) # reexec 中 fork 得到的 子进程 if self.reexec_pid == wpid: self.reexec_pid = 0 else: # A worker was terminated. If the termination reason was # that it could not boot, we'll shut it down to avoid # infinite start/stop cycles. exitcode = status >> 8 if exitcode == self.WORKER_BOOT_ERROR: reason = "Worker failed to boot." raise HaltServer(reason, self.WORKER_BOOT_ERROR) if exitcode == self.APP_LOAD_ERROR: reason = "App failed to load." raise HaltServer(reason, self.APP_LOAD_ERROR) worker = self.WORKERS.pop(wpid, None) if not worker: continue worker.tmp.close() self.cfg.child_exit(self, worker) except OSError as e: if e.errno != errno.ECHILD: raise