def main(): logger.info("Running abaco health checks. Now: {}".format(time.time())) try: clean_up_ipc_dirs() except Exception as e: logger.error("Got exception from clean_up_ipc_dirs: {}".format(e)) try: ttl = Config.get('workers', 'worker_ttl') except Exception as e: logger.error( "Could not get worker_ttl config. Exception: {}".format(e)) if not container_running(name='spawner*'): logger.critical("No spawners running! Launching new spawner..") command = 'python3 -u /actors/spawner.py' # check logging strategy to determine log file name: try: run_container_with_docker( AE_IMAGE, command, name='abaco_spawner_0', environment={'AE_IMAGE': AE_IMAGE.split(':')[0]}, mounts=[], log_file=None) except Exception as e: logger.critical( "Could not restart spawner. Exception: {}".format(e)) try: ttl = int(ttl) except Exception as e: logger.error("Invalid ttl config: {}. Setting to -1.".format(e)) ttl = -1 ids = get_actor_ids() logger.info("Found {} actor(s). Now checking status.".format(len(ids))) for id in ids: check_workers(id, ttl)
def main(): print("Running abaco health checks. Now: {}".format(time.time())) ttl = Config.get('workers', 'worker_ttl') if not container_running(name='spawner*'): print("No spawners running! Launching new spawner..") command = 'python3 -u /actors/spawner.py' run_container_with_docker(AE_IMAGE, command, name='abaco_spawner_0', environment={'AE_IMAGE': AE_IMAGE}) try: ttl = int(ttl) except Exception: ttl = -1 ids = get_actor_ids() print("Found {} actor(s). Now checking status.".format(len(ids))) for id in ids: check_workers(id, ttl)
def check_spawner(queue): """ Check the health and existence of a spawner on this host for a particular queue. :param queue: (str) - the queue to check on. :return: """ logger.debug("top of check_spawner for queue: {}".format(queue)) # spawner container names by convention should have the format <project>_<queue>_spawner_<count>; for example # abaco_default_spawner_2. # so, we look for container names containing a string with that format: spawner_name_segment = '{}_spawner'.format(queue) if not container_running(name=spawner_name_segment): logger.critical( "No spawners running for queue {}! Launching new spawner..".format( queue)) start_spawner(queue) else: logger.debug("spawner for queue {} already running.".format(queue))