示例#1
0
def network_check_health(net_id, retries=60, period=5):
    """
    Check the chain health. If not healthy, will reset the chain

    :param net_id: id of the chain
    :param retries: how many retries before thinking not health
    :param period: wait between two retries
    :return:
    """
    net = cluster_handler.get_by_id(net_id)
    if not net:
        logger.warning("Not find chain {}".format(net_id))
        return
    if net.get("status") != NETWORK_STATUS_RUNNING:  # check running one
        return
    net_name = net.get("name")
    logger.debug("Chain {}/{}: checking health".format(net_name, net_id))

    # free or used by user, then check its health
    for i in range(retries):
        if cluster_handler.refresh_health(net_id):  # chain is healthy
            return
        else:
            logger.debug("Health Check {}: cluster {}/{} is unhealthy!".format(
                i, net_name, net_id))
            time.sleep(period)
    logger.warning("Chain {}/{} is unhealthy!".format(net_name, net_id))
    # only reset free chains
    if cluster_handler.get_by_id(net_id).get("user_id") == "":
        logger.info("Timeout....resetting free unhealthy chain {}/{}".format(
            net_name, net_id))
        cluster_handler.reset_free_one(net_id)
示例#2
0
def chain_check_health(chain_id, retries=3, period=5):
    """
    Check the chain health.

    :param chain_id: id of the chain
    :param retries: how many retries before thinking not health
    :param period: wait between two retries
    :return:
    """
    # if not cluster_handler.check_health(chain_id) \
    #        and c['user_id'] != SYS_UNHEALTHY:
    #    cluster_handler.release_cluster(c['id'], record=False)
    chain = cluster_handler.get_by_id(chain_id)
    if not chain:
        logger.warning("Not find chain with id = {}".format(chain_id))
        return
    chain_user_id = chain.get("user_id")
    chain_name = chain.get("name")
    logger.debug("Chain {}/{}: checking health".format(chain_name, chain_id))

    # we should never process in-processing chains unless deleting one
    if chain_user_id.startswith(SYS_USER):
        if chain_user_id.startswith(SYS_DELETER):  # in system processing, TBD
            for i in range(retries):
                time.sleep(period)
                if cluster_handler.get_by_id(chain_id).get("user_id") != \
                        chain_user_id:
                    return
            logger.info("Delete in-deleting chain {}/{}".format(
                chain_name, chain_id))
            cluster_handler.delete(chain_id)
        return

    # free or used by user, then check its health
    for i in range(retries):
        if cluster_handler.refresh_health(chain_id):  # chain is healthy
            return
        else:
            time.sleep(period)
    logger.warning("Chain {}/{} is unhealthy!".format(chain_name, chain_id))
    # only reset free chains
    if cluster_handler.get_by_id(chain_id).get("user_id") == "":
        logger.info("Deleting free unhealthy chain {}/{}".format(
            chain_name, chain_id))
        # cluster_handler.delete(chain_id)
        cluster_handler.reset_free_one(chain_id)
示例#3
0
def chain_check_health(chain_id, retries=3, period=5):
    """
    Check the chain health.

    :param chain_id: id of the chain
    :param retries: how many retries before thinking not health
    :param period: wait between two retries
    :return:
    """
    chain = cluster_handler.get_by_id(chain_id)
    if not chain:
        logger.warning("Not find chain {}".format(chain_id))
        return
    chain_user_id = chain.get("user_id")
    chain_name = chain.get("name")
    logger.debug("Chain {}/{}: checking health".format(chain_name, chain_id))

    # we should never process in-processing chains unless deleting one
    if chain_user_id.startswith(SYS_USER):
        if chain_user_id.startswith(SYS_DELETER):  # in system processing, TBD
            for i in range(retries):
                time.sleep(period)
                if cluster_handler.get_by_id(chain_id).get("user_id") != \
                        chain_user_id:
                    return
            logger.info("Delete in-deleting chain {}/{}".format(
                chain_name, chain_id))
            cluster_handler.delete(chain_id)
        return

    logger.info("will refresh health")
    # free or used by user, then check its health
    for i in range(retries):
        if cluster_handler.refresh_health(chain_id):  # chain is healthy
            return
        else:
            time.sleep(period)
    logger.warning("Chain {}/{} is unhealthy!".format(chain_name, chain_id))
    # only reset free chains
    if cluster_handler.get_by_id(chain_id).get("user_id") == "":
        logger.info("Resetting free unhealthy chain {}/{}".format(
            chain_name, chain_id))
        cluster_handler.reset_free_one(chain_id)