def drain_list(): headings = { "timestamp": "TIMESTAMP", "ranks": "RANK", "reason": "REASON", "nodelist": "NODELIST", } resp = RPC(flux.Flux(), "resource.status").get() rset = ResourceSet(resp["R"]) nodelist = rset.nodelist lines = [] for ranks, entry in resp["drain"].items(): ranks = IDset(ranks) line = StatusLine( "drain", ranks, Hostlist([nodelist[i] for i in ranks]), entry["reason"], entry["timestamp"], ) lines.append(line) fmt = "{timestamp:<20} {ranks:<8} {reason:<30} {nodelist}" formatter = flux.util.OutputFormat(headings, fmt, prepend="0.") print(formatter.header()) for line in lines: print(formatter.format(line))
def rpc_create(self, topic, payload=None, nodeid=raw.FLUX_NODEID_ANY, flags=0): """ Create a new RPC object """ return RPC(self, topic, payload, nodeid, flags)
def list_handler(args): valid_states = ["up", "down", "allocated", "free", "all"] headings = { "state": "STATE", "nnodes": "NNODES", "ncores": "NCORES", "ngpus": "NGPUS", "ranks": "RANKS", "rlist": "LIST", } states = args.states.split(",") for state in states: if state not in valid_states: LOGGER.error("Invalid resource state %s specified", state) sys.exit(1) fmt = "{state:>10} {nnodes:>6} {ncores:>8} {ngpus:>8}" if args.verbose: fmt += " {rlist}" if args.format: fmt = args.format formatter = flux.util.OutputFormat(headings, fmt, prepend="0.") if args.from_stdin: resp = json.load(sys.stdin) else: resp = RPC(flux.Flux(), "sched.resource-status").get() resources = SchedResourceList(resp) if not args.no_header: print(formatter.header()) for state in states: print(formatter.format(resources[state]))
def undrain(args): """ Send an "undrain" request to resource module for args.targets """ RPC(flux.Flux(), "resource.undrain", { "targets": args.targets }, nodeid=0).get()
def drain(args): """ Send a drain request to resource module for args.targets, if args.targets not specified, then list currently drained targets """ if args.targets is None: drain_list() return payload = { "targets": args.targets, } if args.update and args.force: LOGGER.error("Only one of --force and --update may be specified") sys.exit(1) if args.update: payload["mode"] = "update" elif args.force: payload["mode"] = "overwrite" if args.reason: payload["reason"] = " ".join(args.reason) RPC( flux.Flux(), "resource.drain", payload, nodeid=0, ).get()
def rpc_send(self, topic, payload=ffi.NULL, nodeid=raw.FLUX_NODEID_ANY, flags=0): """ Create and send an RPC in one step """ with RPC(self, topic, payload, nodeid, flags) as rpc: return rpc.get()
def rpc_send(self, topic, payload=ffi.NULL, nodeid=flux.FLUX_NODEID_ANY, flags=0): """ Create and send an RPC in one step """ r = RPC(self, topic, payload, nodeid, flags) return r.get()
def reload(args): """ Send a "reload" request to resource module """ RPC( flux.Flux(), "resource.reload", {"path": os.path.realpath(args.path), "xml": args.xml, "force": args.force}, nodeid=0, ).get()
def __init__(self, handle): # Initiate RPCs to both resource.status and sched.resource-status: children = [ RPC(handle, "resource.status", nodeid=0), resource_list(handle) ] self.rlist = None self.rstatus = None self.allocated_ranks = None super().__init__(children)
def drain(args): """ Send a drain request to resource module for args.idset """ RPC( flux.Flux(), "resource.drain", { "idset": args.idset, "reason": " ".join(args.reason) }, ).get()
def drain(args): """ Send a drain request to resource module for args.targets, if args.targets not specified, then list currently drained targets """ if args.targets is None: drain_list() return RPC( flux.Flux(), "resource.drain", {"targets": args.targets, "reason": " ".join(args.reason)}, nodeid=0, ).get()
def status(args): valid_states = ["all", "online", "avail", "offline", "exclude", "drain"] default_states = "avail,offline,exclude,drain" headings = { "state": "STATUS", "nnodes": "NNODES", "ranks": "RANKS", "nodelist": "NODELIST", "reason": "REASON", } # Emit list of valid states or formats if requested if "help" in [args.states, args.format]: status_help(args, valid_states, headings) # Get state list from args or defaults: states = status_get_state_list(args, valid_states, default_states) # Include reason field only with -vv if args.verbose >= 2: fmt = "{state:>10} {nnodes:>6} {ranks:<15} {reason:<25} {nodelist}" else: fmt = "{state:>10} {nnodes:>6} {ranks:<15} {nodelist}" if args.format: fmt = args.format # Get payload from stdin or from resource.status RPC: if args.from_stdin: resp = sys.stdin.read() else: resp = RPC(flux.Flux(), "resource.status", nodeid=0).get() rstat = ResourceStatus.from_status_response(resp, fmt) formatter = flux.util.OutputFormat(headings, fmt, prepend="0.") if not args.no_header: print(formatter.header()) for line in sorted(rstat, key=lambda x: valid_states.index(x.state)): if line.state not in states: continue # Skip empty lines unless --verbose or --states if line.nnodes == 0 and args.states is None and not args.verbose: continue print(formatter.format(line))
def cleanup_push(args): """ Add a command to run after completion of the initial program, before rc3. It is pushed to the front of the list of commands. If command was not provided as args, read one command per line from stdio. Push these in reverse order to retain the order of the block of commands. """ if args.cmdline: commands = [(" ".join(args.cmdline))] else: commands = [line.strip() for line in sys.stdin] RPC( flux.Flux(), "runat.push", { "name": "cleanup", "commands": commands[::-1] }, ).get()
### Main test program logging.basicConfig(level=logging.INFO) log = logging.getLogger("waitup-test") handle = flux.Flux() size = int(handle.attr_get("size")) log.info("unloading resource modules across %d ranks", size) resource_unload_all() log.info("reloading resource module on rank 0") resource_load(0) log.info("initiating RPC to wait for %d ranks", size) future = RPC(handle, "resource.monitor-waitup", {"up": size}) # Ensure waitup initially blocks delay = 0.5 log.info("waiting up to %.2fs for RPC (should block)", delay) try: future.wait_for(delay) except OSError as err: if err.errno == errno.ETIMEDOUT: pass else: raise err if future.is_ready(): log.error("resource.get-xml returned before expected") sys.exit(1)
def undrain(args): """ Send an "undrain" request to resource module for args.idset """ RPC(flux.Flux(), "resource.undrain", {"idset": args.idset}).get()
### Main test program logging.basicConfig(level=logging.INFO) log = logging.getLogger("get-xml-test") handle = flux.Flux() size = int(handle.attr_get("size")) log.info("unloading resource modules across %d ranks", size) resource_unload_all() log.info("reloading all resource modules except rank %d", size - 1) resource_load_all_except_one(size) log.info("initiating resource.get-xml RPC") future = RPC(handle, "resource.get-xml", {}) # Ensure get-xml initially blocks delay = 0.5 log.info("waiting up to %.2fs for get-xml (should block)", delay) try: future.wait_for(delay) except OSError as err: if err.errno == errno.ETIMEDOUT: pass else: raise err if future.is_ready(): log.error("resource.get-xml returned before expected") sys.exit(1)
def _query(self): return RPC(self.handle, "job-list.job-stats", {})