def main(): logger.info("Running abaco health checks. Now: {}".format(time.time())) try: clean_up_ipc_dirs() except Exception as e: logger.error("Got exception from clean_up_ipc_dirs: {}".format(e)) try: ttl = Config.get('workers', 'worker_ttl') except Exception as e: logger.error( "Could not get worker_ttl config. Exception: {}".format(e)) if not container_running(name='spawner*'): logger.critical("No spawners running! Launching new spawner..") command = 'python3 -u /actors/spawner.py' # check logging strategy to determine log file name: try: run_container_with_docker( AE_IMAGE, command, name='abaco_spawner_0', environment={'AE_IMAGE': AE_IMAGE.split(':')[0]}, mounts=[], log_file=None) except Exception as e: logger.critical( "Could not restart spawner. Exception: {}".format(e)) try: ttl = int(ttl) except Exception as e: logger.error("Invalid ttl config: {}. Setting to -1.".format(e)) ttl = -1 ids = get_actor_ids() logger.info("Found {} actor(s). Now checking status.".format(len(ids))) for id in ids: check_workers(id, ttl)
def main(): print("Running abaco health checks. Now: {}".format(time.time())) ttl = Config.get('workers', 'worker_ttl') if not container_running(name='spawner*'): print("No spawners running! Launching new spawner..") command = 'python3 -u /actors/spawner.py' run_container_with_docker(AE_IMAGE, command, name='abaco_spawner_0', environment={'AE_IMAGE': AE_IMAGE}) try: ttl = int(ttl) except Exception: ttl = -1 ids = get_actor_ids() print("Found {} actor(s). Now checking status.".format(len(ids))) for id in ids: check_workers(id, ttl)
def main(): print("Running abaco health checks. Now: {}".format(time.time())) ttl = Config.get('workers', 'worker_ttl') if not container_running(name='spawner*'): print("No spawners running! Launching new spawner..") command = 'python3 -u /actors/spawner.py' run_container_with_docker(AE_IMAGE, command, name='abaco_spawner_0', environment={'AE_IMAGE': AE_IMAGE}) try: ttl = int(ttl) except Exception: ttl = -1 ids = get_actor_ids() print("Found {} actor(s). Now checking status.".format(len(ids))) for id in ids: check_workers(id, ttl)
def start_spawner(queue, idx='0'): """ Start a spawner on this host listening to a queue, `queue`. :param queue: (str) - the queue the spawner should listen to. :param idx: (str) - the index to use as a suffix to the spawner container name. :return: """ command = 'python3 -u /actors/spawner.py' name = 'healthg_{}_spawner_{}'.format(queue, idx) try: environment = dict(os.environ) except Exception as e: environment = {} logger.error( "Unable to convert environment to dict; exception: {}".format(e)) environment.update({ 'AE_IMAGE': AE_IMAGE.split(':')[0], 'queue': queue, }) if not '_abaco_secret' in environment: msg = 'Error in health process trying to start spawner. Did not find an _abaco_secret. Aborting' logger.critical(msg) raise # check logging strategy to determine log file name: log_file = 'abaco.log' if get_log_file_strategy() == 'split': log_file = 'spawner.log' try: run_container_with_docker(AE_IMAGE, command, name=name, environment=environment, mounts=[], log_file=log_file) except Exception as e: logger.critical( "Could not restart spawner for queue {}. Exception: {}".format( queue, e))