def main(): config_root_logger(filename, level="DEBUG", util_level="DEBUG", logfile=logfile) sys.excepthook = handle_exception util.cfg = util.config_from_metadata() while not util.cfg.pubsub_topic_id: sleep(300) util.cfg = util.config_from_metadata() streaming_pull_future = subscriber.subscribe(subscription_path, callback=callback) log.info(f"Listening for messages on '{subscription_path}'...") with subscriber: try: streaming_pull_future.result() except Exception as e: log.error(e) streaming_pull_future.cancel() streaming_pull_future.result()
from time import sleep import setup import sys import util from collections import namedtuple from pathlib import Path from google.cloud import pubsub_v1 from util import project, lkp, cfg from util import config_root_logger, handle_exception, run, publish_message filename = Path(__file__).name logfile = (Path(cfg.slurm_log_dir if cfg else ".") / filename).with_suffix(".log") util.chown_slurm(logfile, mode=0o600) config_root_logger(filename, level="DEBUG", util_level="DEBUG", logfile=logfile) log = logging.getLogger(filename) project_id = project subscription_id = lkp.hostname subscriber = pubsub_v1.SubscriberClient() subscription_path = subscriber.subscription_path(project_id, subscription_id) StateTuple = namedtuple("StateTuple", "base,flags") def natural_keys(text): """String sorting heuristic function for numbers""" def atoi(text):
if __name__ == '__main__': parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument('args', nargs='+', help="nodes [jobid]") parser.add_argument('--debug', '-d', dest='debug', action='store_true', help='Enable debugging output') if "SLURM_JOB_NODELIST" in os.environ: args = parser.parse_args(sys.argv[1:] + [os.environ['SLURM_JOB_NODELIST'], os.environ['SLURM_JOB_ID']]) else: args = parser.parse_args() nodes = args.args[0] job_id = 0 if len(args.args) > 1: job_id = args.args[1] if args.debug: util.config_root_logger(level='DEBUG', util_level='DEBUG', logfile=LOGFILE) else: util.config_root_logger(level='INFO', util_level='ERROR', logfile=LOGFILE) log = logging.getLogger(Path(__file__).name) sys.excepthook = util.handle_exception main(nodes, job_id)
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter) parser.add_argument("slurm_cluster_name", help="Slurm cluster name filter") parser.add_argument("--partition", "-p", dest="partition_name", help="Slurm partition name filter") parser.add_argument( "--debug", "-d", dest="debug", action="store_true", help="Enable debugging output", ) args = parser.parse_args() logfile = (Path(__file__).parent / logger_name).with_suffix(".log") if args.debug: config_root_logger(logger_name, level="DEBUG", util_level="DEBUG", logfile=logfile) else: config_root_logger(logger_name, level="INFO", util_level="ERROR", logfile=logfile) main(args)
help="Force attempted creation of the nodelist, whether nodes are exclusive or not.", ) parser.add_argument( "--debug", "-d", dest="debug", action="store_true", help="Enable debugging output" ) if __name__ == "__main__": if "SLURM_JOB_NODELIST" in os.environ: argv = [ *sys.argv[1:], os.environ["SLURM_JOB_NODELIST"], os.environ["SLURM_JOB_ID"], ] args = parser.parse_args(argv) else: args = parser.parse_args() util.chown_slurm(LOGFILE, mode=0o600) if args.debug: util.config_root_logger( filename, level="DEBUG", util_level="DEBUG", logfile=LOGFILE ) else: util.config_root_logger( filename, level="INFO", util_level="ERROR", logfile=LOGFILE ) sys.excepthook = util.handle_exception main(args.nodelist, args.job_id, args.force)
{ "controller": setup_controller, "compute": setup_compute, "login": setup_login, }, lkp.instance_role, lambda: log.fatal(f"Unknown node role: {lkp.instance_role}"), ) setup() end_motd() if __name__ == "__main__": util.chown_slurm(LOGFILE, mode=0o600) util.config_root_logger(filename, logfile=LOGFILE, util_level="DEBUG") sys.excepthook = util.handle_exception lkp = util.Lookup(cfg) # noqa F811 try: main() except subprocess.TimeoutExpired as e: log.error(f"""TimeoutExpired: command={e.cmd} timeout={e.timeout} stdout: {e.stdout.strip()} stderr: {e.stderr.strip()} """)