def wait(self): attempts = 0 while attempts <= _MAX_ATTEMPTS: attempts += 1 unhealthy = self.count_unhealthy() if unhealthy == 0: logger.info("All healthy! Continuing.") return logger.info("Still %s unhealthy.", unhealthy) time.sleep(0.2 * (1.2**attempts)) raise FrameworkException( "Failed to bring up services: %s" % ", ".join("%s/%s" % (p.service_name, p.shard) for p in self._programs.itervalues() if not p.healthy))
def _check_with_backoff(self): """Check and wait that the service is healthy.""" self.healthy = False attempts = 0 while attempts < _MAX_ATTEMPTS: attempts += 1 self._check() if not self.healthy: time.sleep(0.2 * (1.2**attempts)) else: return # Service did not start. raise FrameworkException("Failed to bring up service %s/%s" % (self.service_name, self.shard))
def _check(self): """Check that the program is healthy and set the healthy bit. raise (FrameworkException): when the state is weird, critical. """ try: if self.service_name == "RankingWebServer": self._check_ranking_web_server() else: self._check_service() except socket.error as error: self.healthy = False if error.errno != errno.ECONNREFUSED: raise FrameworkException("Weird connection state.") else: self.healthy = True
def _check_service(self): """Health checker for services and servers.""" rs = RemoteService(self.service_name, self.shard) reply = rs.call("echo", {"string": "hello"}) if reply["__data"] != "hello": raise FrameworkException("Strange response from service.") # In case it is a server, we also check HTTP is serving. if self.service_name == "AdminWebServer": port = get_cms_config()["admin_listen_port"] elif self.service_name == "ContestWebServer": port = get_cms_config()["contest_listen_port"][self.shard] else: return sock = socket.socket() sock.connect(("127.0.0.1", port)) sock.close()