示例#1
0
def main():
    logger.info("Running abaco health checks. Now: {}".format(time.time()))
    try:
        clean_up_ipc_dirs()
    except Exception as e:
        logger.error("Got exception from clean_up_ipc_dirs: {}".format(e))
    try:
        ttl = Config.get('workers', 'worker_ttl')
    except Exception as e:
        logger.error(
            "Could not get worker_ttl config. Exception: {}".format(e))
    if not container_running(name='spawner*'):
        logger.critical("No spawners running! Launching new spawner..")
        command = 'python3 -u /actors/spawner.py'
        # check logging strategy to determine log file name:
        try:
            run_container_with_docker(
                AE_IMAGE,
                command,
                name='abaco_spawner_0',
                environment={'AE_IMAGE': AE_IMAGE.split(':')[0]},
                mounts=[],
                log_file=None)
        except Exception as e:
            logger.critical(
                "Could not restart spawner. Exception: {}".format(e))
    try:
        ttl = int(ttl)
    except Exception as e:
        logger.error("Invalid ttl config: {}. Setting to -1.".format(e))
        ttl = -1
    ids = get_actor_ids()
    logger.info("Found {} actor(s). Now checking status.".format(len(ids)))
    for id in ids:
        check_workers(id, ttl)
示例#2
0
文件: health.py 项目: TACC/abaco
def main():
    print("Running abaco health checks. Now: {}".format(time.time()))
    ttl = Config.get('workers', 'worker_ttl')
    if not container_running(name='spawner*'):
        print("No spawners running! Launching new spawner..")
        command = 'python3 -u /actors/spawner.py'
        run_container_with_docker(AE_IMAGE, command, name='abaco_spawner_0', environment={'AE_IMAGE': AE_IMAGE})
    try:
        ttl = int(ttl)
    except Exception:
        ttl = -1
    ids = get_actor_ids()
    print("Found {} actor(s). Now checking status.".format(len(ids)))
    for id in ids:
        check_workers(id, ttl)
示例#3
0
文件: health.py 项目: mwvaughn/abaco
def main():
    print("Running abaco health checks. Now: {}".format(time.time()))
    ttl = Config.get('workers', 'worker_ttl')
    if not container_running(name='spawner*'):
        print("No spawners running! Launching new spawner..")
        command = 'python3 -u /actors/spawner.py'
        run_container_with_docker(AE_IMAGE, command, name='abaco_spawner_0', environment={'AE_IMAGE': AE_IMAGE})
    try:
        ttl = int(ttl)
    except Exception:
        ttl = -1
    ids = get_actor_ids()
    print("Found {} actor(s). Now checking status.".format(len(ids)))
    for id in ids:
        check_workers(id, ttl)
示例#4
0
def check_spawner(queue):
    """
    Check the health and existence of a spawner on this host for a particular queue.
    :param queue: (str) - the queue to check on.
    :return:
    """
    logger.debug("top of check_spawner for queue: {}".format(queue))
    # spawner container names by convention should have the format <project>_<queue>_spawner_<count>; for example
    #   abaco_default_spawner_2.
    # so, we look for container names containing a string with that format:
    spawner_name_segment = '{}_spawner'.format(queue)
    if not container_running(name=spawner_name_segment):
        logger.critical(
            "No spawners running for queue {}! Launching new spawner..".format(
                queue))
        start_spawner(queue)
    else:
        logger.debug("spawner for queue {} already running.".format(queue))