def get_nodes_dict(xml=None): """Similar to derived getnodes from vsc.pbs.interface.get_nodes_dict returns a dict of nodes, with a 'status' field which is a dict of statusses """ if xml is None: cmd = "mdiag -n --format=xml" err, xml = RunTimeout.run(cmd.split(), timeout=60) if err: _log.error("Problem occurred running %s: %s (%s)" % (cmd, err, xml)) return None # build tree tree = etree.fromstring(xml) nodes = {} for node in tree: # <node AVLCLASS="[bshort][debug][short][long][special][workshop]" # CFGCLASS="[bshort][debug][short][long][special][workshop]" # FEATURES="hadoop,collectl" FLAGS="rmdetected" JOBLIST="3956525" # LASTUPDATETIME="1363206409" LOAD="8.160000" MAXJOB="0" # MAXJOBPERUSER="******" MAXLOAD="0.000000" NODEID="node001.gengar.gent.vsc" # NODEINDEX="1" NODESTATE="Busy" OS="linux" OSLIST="linux" PARTITION="gengar" # PRIORITY="0" PROCSPEED="0" RADISK="92194" RAMEM="16053" RAPROC="0" RASWAP="34219" # RCDISK="92381" RCMEM="16053" RCPROC="8" RCSWAP="36533" RESCOUNT="1" # RMACCESSLIST="gengar" RSVLIST="3956525" SPEED="1.000000" STATACTIVETIME="24357970" # STATMODIFYTIME="1363076905" STATTOTALTIME="25499884" STATUPTIME="24971920"> try: host = node.get("NODEID") nodes[host] = {} nodes[host]['xml'] = node.items() states = MOAB_PBS_NODEMAP[node.get("NODESTATE").lower()] derived = { 'states': states, 'state': states[0], 'size': str2byte(node.get("RCDISK") + "mb"), 'physmem': str2byte(node.get("RCMEM") + "mb"), 'np': int(node.get("RCPROC")), } except (TypeError, AttributeError) as e: del nodes[host] node_txt = etree.tostring(node, pretty_print=True) if host in ('localhost', ): _log.debug("Skipping %s (%s)" % (host, node_txt)) continue else: raise type(e)("%s for node %s" % (e, node_txt)) # add state mapping to derived pbs_nodes.make_state_map(derived) nodes[host]['derived'] = derived return nodes
def get_nodes_dict(): """Get the pbs_nodes equivalent info as dict""" query = get_query() node_states = query.getnodes([]) for name, full_state in node_states.items(): # just add states states = full_state[ATTR_STATE] if ND_free in states and ATTR_JOBS in full_state: _log.debug('Added free_and_job node %s' % (name)) states.insert(0, ND_free_and_job) if ND_free in states and ATTR_JOBS not in full_state: _log.debug('Append idle node %s' % (name)) states.append(ND_idle) # append it, not insert if ND_offline in states and ATTR_JOBS not in full_state: _log.debug('Append idle node %s' % (name)) states.append(ND_idle) if ATTR_ERROR in full_state: _log.debug('Added error node %s' % (name)) states.insert(0, ND_error) if ND_down in states and ATTR_ERROR in full_state: _log.debug('Added down_on_error node %s' % (name)) states.insert(0, ND_down_on_error) # extend the node dict with derived dict (for convenience) derived = {} if ATTR_JOBS in full_state: jobs = full_state.get_jobs() if not all(JOBID_REG.search(x.strip()) for x in jobs): _log.debug('Added bad node %s for jobs %s' % (name, jobs)) states.insert(0, ND_bad) derived[ATTR_JOBS] = jobs derived[ATTR_STATES] = [str(x) for x in states] make_state_map(derived) if ATTR_NP in full_state: derived[ATTR_NP] = int(full_state[ATTR_NP][0]) if ATTR_STATUS in full_state: status = full_state[ATTR_STATUS] for prop in ['physmem', 'totmem', 'size']: if prop not in status: continue val = status.get(prop)[0] if prop in ('size',): # 'size': ['539214180kb:539416640kb'] # - use 2nd field val = val.split(':')[1] derived[prop] = str2byte(val) full_state['derived'] = derived _log.debug("node %s derived data %s " % (name, derived)) return node_states
def get_nodes_dict(): """Get the pbs_nodes equivalent info as dict""" query = get_query() node_states = query.getnodes([]) for name, full_state in node_states.items(): # just add states states = full_state[ATTR_STATE] if ND_free in states and ATTR_JOBS in full_state: _log.debug('Added free_and_job node %s' % (name)) states.insert(0, ND_free_and_job) if ND_free in states and ATTR_JOBS not in full_state: _log.debug('Append idle node %s' % (name)) states.append(ND_idle) # append it, not insert if ND_offline in states and ATTR_JOBS not in full_state: _log.debug('Append idle node %s' % (name)) states.append(ND_idle) if ATTR_ERROR in full_state: _log.debug('Added error node %s' % (name)) states.insert(0, ND_error) if ND_down in states and ATTR_ERROR in full_state: _log.debug('Added down_on_error node %s' % (name)) states.insert(0, ND_down_on_error) # extend the node dict with derived dict (for convenience) derived = {} if ATTR_JOBS in full_state: jobs = full_state.get_jobs() if not all(JOBID_REG.search(x.strip()) for x in jobs): _log.debug('Added bad node %s for jobs %s' % (name, jobs)) states.insert(0, ND_bad) derived[ATTR_JOBS] = jobs derived[ATTR_STATES] = [str(x) for x in states] make_state_map(derived) if ATTR_NP in full_state: derived[ATTR_NP] = int(full_state[ATTR_NP][0]) if ATTR_STATUS in full_state: status = full_state[ATTR_STATUS] for prop in ['physmem', 'totmem', 'size']: if prop not in status: continue val = status.get(prop)[0] if prop in ('size', ): # 'size': ['539214180kb:539416640kb'] # - use 2nd field val = val.split(':')[1] derived[prop] = str2byte(val) full_state['derived'] = derived _log.debug("node %s derived data %s " % (name, derived)) return node_states
def collect_nodeinfo(): """Collect node information""" types = {} state_list = [] node_list = [] re_host_id = re.compile(r"(?P<id>\d+)") for idx, (node, full_state) in enumerate(get_nodes()): # A node can have serveral states. We are only interested in first entry. derived = full_state['derived'] # what state to report? state_list.append(derived[ATTR_STATE]) if derived[ATTR_NODESTATE] == NDST_OK: cores = derived.get(ATTR_NP, None) physmem = derived.get('physmem', None) totmem = derived.get('totmem', None) size = derived.get('size', None) if all([cores, physmem, totmem, size]): # there shouldn't be any value 0 # round mem to 1 gb, size to 5gb GB = str2byte('gb') pmem = ceil(10 * physmem / GB) / 10 tmem = ceil(10 * totmem / GB) / 10 swap = tmem - pmem dsize = ceil(10 * size / (5 * GB)) / 2 typ = (cores, pmem, swap, dsize) if typ not in types: types[typ] = [] types[typ].append(node) result = re_host_id.search(node) if result: node_list.append(result.group('id')) else: node_list.append(str(idx + 1)) # offset +1 return node_list, state_list, types
def collect_nodeinfo(): """Collect node information""" types = {} state_list = [] node_list = [] re_host_id = re.compile(r"(?P<id>\d+)") for idx, (node, full_state) in enumerate(get_nodes()): # A node can have serveral states. We are only interested in first entry. derived = full_state['derived'] # what state to report? state_list.append(derived['state']) if derived['nodestate'] == NDST_OK: cores = derived.get('np', None) physmem = derived.get('physmem', None) totmem = derived.get('totmem', None) size = derived.get('size', None) if all([cores, physmem, totmem, size]): # there shouldn't be any value 0 # round mem to 1 gb, size to 5gb GB = str2byte('gb') pmem = ceil(10 * physmem / GB) / 10 tmem = ceil(10 * totmem / GB) / 10 swap = tmem - pmem dsize = ceil(10 * size / (5 * GB)) / 2 typ = (cores, pmem, swap, dsize) if not typ in types: types[typ] = [] types[typ].append(node) result = re_host_id.search(node) if result: node_list.append(result.group('id')) else: node_list.append(str(idx + 1)) # offset +1 return node_list, state_list, types
def get_jobs_dict(): """Get jobs dict with derived info""" jobs = get_jobs() reg_user = re.compile(r"(?P<user>\w+)@\S+") nodes_cores = re.compile(r"(?P<nodes>\d+)(:ppn=(?P<cores>\d+))?") nodes_nocores = re.compile(r"(?P<nodes>node\d+).*?") for jobdata in jobs.values(): derived = {} derived["state"] = jobdata["job_state"][0] r = reg_user.search(jobdata["Job_Owner"][0]) if r: derived["user"] = r.group("user") if "Resource_List" in jobdata: resource_list = jobdata["Resource_List"] # walltime if "walltime" in resource_list: totalwallsec = str2sec(resource_list["walltime"][0]) if totalwallsec is not None: derived["totalwalltimesec"] = totalwallsec # nodes / cores if "neednodes" in resource_list: m = nodes_cores.match(resource_list["neednodes"][0]) if not m: if nodes_nocores.match(resource_list["neednodes"][0]): m = nodes_cores.match("1") elif "nodes" in resource_list: m = nodes_cores.match(resource_list["nodes"][0]) if m: nodes = int(m.group("nodes")) cores = 1 if len(m.groups()) > 1 and m.group("cores"): cores = int(m.group("cores")) derived["nodes"] = nodes derived["cores"] = cores # resource used if "resources_used" in jobdata: resources_used = jobdata["resources_used"] if "mem" in resources_used: derived["used_mem"] = str2byte(resources_used["mem"][0]) if "vmem" in resources_used: derived["used_vmem"] = str2byte(resources_used["vmem"][0]) if "walltime" in resources_used: sec = str2sec(resources_used["walltime"][0]) if sec is not None: derived["used_walltime"] = sec if "cput" in resources_used: sec = str2sec(resources_used["cput"][0]) if sec is not None: derived["used_cput"] = sec if "exec_host" in jobdata: nodes = jobdata.get_nodes() exec_hosts = {} for host in nodes: hostname = host.split("/")[0] if not hostname in exec_hosts: exec_hosts[hostname] = 0 exec_hosts[hostname] += 1 derived["exec_hosts"] = exec_hosts jobdata["derived"] = derived return jobs
def get_jobs_dict(): """Get jobs dict with derived info""" jobs = get_jobs() reg_user = re.compile(r"(?P<user>\w+)@\S+") nodes_cores = re.compile(r"(?P<nodes>\d+)(:ppn=(?P<cores>\d+))?") nodes_nocores = re.compile(r"(?P<nodes>node\d+).*?") for jobdata in jobs.values(): derived = {} derived['state'] = jobdata['job_state'][0] r = reg_user.search(jobdata['Job_Owner'][0]) if r: derived['user'] = r.group('user') if 'Resource_List' in jobdata: resource_list = jobdata['Resource_List'] # walltime if 'walltime' in resource_list: totalwallsec = str2sec(resource_list['walltime'][0]) if totalwallsec is not None: derived['totalwalltimesec'] = totalwallsec # nodes / cores if 'neednodes' in resource_list: m = nodes_cores.match(resource_list['neednodes'][0]) if not m: if nodes_nocores.match(resource_list['neednodes'][0]): m = nodes_cores.match("1") elif 'nodes' in resource_list: m = nodes_cores.match(resource_list['nodes'][0]) if m: nodes = int(m.group('nodes')) cores = 1 if len(m.groups()) > 1 and m.group('cores'): cores = int(m.group('cores')) derived['nodes'] = nodes derived['cores'] = cores # resource used if 'resources_used' in jobdata: resources_used = jobdata['resources_used'] if 'mem' in resources_used: derived['used_mem'] = str2byte(resources_used['mem'][0]) if 'vmem' in resources_used: derived['used_vmem'] = str2byte(resources_used['vmem'][0]) if 'walltime' in resources_used: sec = str2sec(resources_used['walltime'][0]) if sec is not None: derived['used_walltime'] = sec if 'cput' in resources_used: sec = str2sec(resources_used['cput'][0]) if sec is not None: derived['used_cput'] = sec if 'exec_host' in jobdata: exec_hosts = {} for host in jobdata['exec_host'][0].split('+'): hostname = host.split('/')[0] if not hostname in exec_hosts: exec_hosts[hostname] = 0 exec_hosts[hostname] += 1 derived['exec_hosts'] = exec_hosts jobdata['derived'] = derived return jobs
def get_jobs_dict(attrs=None): """ Get jobs dict with derived info attrs is passed to get_jobs """ jobs = get_jobs(attrs=attrs) reg_user = re.compile(r"(?P<user>\w+)@\S+") nodes_cores = re.compile(r"(?P<nodes>\d+)(:ppn=(?P<cores>\d+))?") namednodes_cores = re.compile(r"(?P<nodes>node\d+[^:+]*)(:ppn=(?P<cores>\d+))?") nodes_nocores = re.compile(r"(?P<nodes>node\d+).*?") for jobdata in jobs.values(): derived = {} derived['state'] = jobdata['job_state'][0] r = reg_user.search(jobdata['Job_Owner'][0]) if r: derived['user'] = r.group('user') if 'Resource_List' in jobdata: resource_list = jobdata['Resource_List'] # walltime if 'walltime' in resource_list: totalwallsec = str2sec(resource_list['walltime'][0]) if totalwallsec is not None: derived['totalwalltimesec'] = totalwallsec # nodes / cores need_nodes = None if 'neednodes' in resource_list: need_nodes = resource_list['neednodes'][0] elif 'nodes' in resource_list: need_nodes = resource_list['nodes'][0] if need_nodes is not None: m = nodes_cores.match(need_nodes) if not m: namednode_m = namednodes_cores.match(need_nodes) if namednode_m: m = nodes_cores.match("1:ppn=%s" % (namednode_m.groups()[2] or "1")) elif nodes_nocores.match(need_nodes): m = nodes_cores.match("1") if m: nodes = int(m.group('nodes')) cores = 1 if len(m.groups()) > 1 and m.group('cores'): cores = int(m.group('cores')) derived['nodes'] = nodes derived['cores'] = cores # resource used if 'resources_used' in jobdata: resources_used = jobdata['resources_used'] if 'mem' in resources_used: derived['used_mem'] = str2byte(resources_used['mem'][0]) if 'vmem' in resources_used: derived['used_vmem'] = str2byte(resources_used['vmem'][0]) if 'walltime' in resources_used: sec = str2sec(resources_used['walltime'][0]) if sec is not None: derived['used_walltime'] = sec if 'cput' in resources_used: sec = str2sec(resources_used['cput'][0]) if sec is not None: derived['used_cput'] = sec if 'exec_host' in jobdata: nodes = jobdata.get_nodes() exec_hosts = {} for host in nodes: hostname = host.split('/')[0] if hostname not in exec_hosts: exec_hosts[hostname] = 0 exec_hosts[hostname] += 1 derived['exec_hosts'] = exec_hosts jobdata['derived'] = derived return jobs