def network_check_health(net_id, retries=60, period=5): """ Check the chain health. If not healthy, will reset the chain :param net_id: id of the chain :param retries: how many retries before thinking not health :param period: wait between two retries :return: """ net = cluster_handler.get_by_id(net_id) if not net: logger.warning("Not find chain {}".format(net_id)) return if net.get("status") != NETWORK_STATUS_RUNNING: # check running one return net_name = net.get("name") logger.debug("Chain {}/{}: checking health".format(net_name, net_id)) # free or used by user, then check its health for i in range(retries): if cluster_handler.refresh_health(net_id): # chain is healthy return else: logger.debug("Health Check {}: cluster {}/{} is unhealthy!".format( i, net_name, net_id)) time.sleep(period) logger.warning("Chain {}/{} is unhealthy!".format(net_name, net_id)) # only reset free chains if cluster_handler.get_by_id(net_id).get("user_id") == "": logger.info("Timeout....resetting free unhealthy chain {}/{}".format( net_name, net_id)) cluster_handler.reset_free_one(net_id)
def chain_check_health(chain_id, retries=3, period=5): """ Check the chain health. :param chain_id: id of the chain :param retries: how many retries before thinking not health :param period: wait between two retries :return: """ # if not cluster_handler.check_health(chain_id) \ # and c['user_id'] != SYS_UNHEALTHY: # cluster_handler.release_cluster(c['id'], record=False) chain = cluster_handler.get_by_id(chain_id) if not chain: logger.warning("Not find chain with id = {}".format(chain_id)) return chain_user_id = chain.get("user_id") chain_name = chain.get("name") logger.debug("Chain {}/{}: checking health".format(chain_name, chain_id)) # we should never process in-processing chains unless deleting one if chain_user_id.startswith(SYS_USER): if chain_user_id.startswith(SYS_DELETER): # in system processing, TBD for i in range(retries): time.sleep(period) if cluster_handler.get_by_id(chain_id).get("user_id") != \ chain_user_id: return logger.info("Delete in-deleting chain {}/{}".format( chain_name, chain_id)) cluster_handler.delete(chain_id) return # free or used by user, then check its health for i in range(retries): if cluster_handler.refresh_health(chain_id): # chain is healthy return else: time.sleep(period) logger.warning("Chain {}/{} is unhealthy!".format(chain_name, chain_id)) # only reset free chains if cluster_handler.get_by_id(chain_id).get("user_id") == "": logger.info("Deleting free unhealthy chain {}/{}".format( chain_name, chain_id)) # cluster_handler.delete(chain_id) cluster_handler.reset_free_one(chain_id)
def chain_check_health(chain_id, retries=3, period=5): """ Check the chain health. :param chain_id: id of the chain :param retries: how many retries before thinking not health :param period: wait between two retries :return: """ chain = cluster_handler.get_by_id(chain_id) if not chain: logger.warning("Not find chain {}".format(chain_id)) return chain_user_id = chain.get("user_id") chain_name = chain.get("name") logger.debug("Chain {}/{}: checking health".format(chain_name, chain_id)) # we should never process in-processing chains unless deleting one if chain_user_id.startswith(SYS_USER): if chain_user_id.startswith(SYS_DELETER): # in system processing, TBD for i in range(retries): time.sleep(period) if cluster_handler.get_by_id(chain_id).get("user_id") != \ chain_user_id: return logger.info("Delete in-deleting chain {}/{}".format( chain_name, chain_id)) cluster_handler.delete(chain_id) return logger.info("will refresh health") # free or used by user, then check its health for i in range(retries): if cluster_handler.refresh_health(chain_id): # chain is healthy return else: time.sleep(period) logger.warning("Chain {}/{} is unhealthy!".format(chain_name, chain_id)) # only reset free chains if cluster_handler.get_by_id(chain_id).get("user_id") == "": logger.info("Resetting free unhealthy chain {}/{}".format( chain_name, chain_id)) cluster_handler.reset_free_one(chain_id)