def collect(self): # Metric declarations NODES_CPUS = GaugeMetricFamily('slurm_nodes_cpus', 'Numbers of CPUs on nodes in the cluster grouped by {}'.format(', '.join(self.labels)), labels=self.labels) NODES_CPUS_ALLOC = GaugeMetricFamily('slurm_nodes_cpus_alloc', 'Numbers of CPUs allocated on nodes in the cluster grouped by {}'.format(', '.join(self.labels)), labels=self.labels) NODES_CPU_LOAD = GaugeMetricFamily('slurm_nodes_cpu_load', 'CPU loads on nodes in the cluster grouped by {}'.format(', '.join(self.labels)), labels=self.labels) NODES_MEM_TOTAL = GaugeMetricFamily('slurm_nodes_mem_total', 'Total amounts of memory available on nodes in the cluster grouped by {}'.format(', '.join(self.labels)), labels=self.labels, unit='bytes') NODES_MEM_FREE = GaugeMetricFamily('slurm_nodes_mem_free', 'Amounts of free memory allocated on nodes in the cluster grouped by {}'.format(', '.join(self.labels)), labels=self.labels, unit='bytes') NODES_MEM_ALLOC = GaugeMetricFamily('slurm_nodes_mem_alloc', 'Amounts of memory allocated on nodes in the cluster grouped by {}'.format(', '.join(self.labels)), labels=self.labels, unit='bytes') # Load node info from Slurm nodes = pyslurm.node().get() cluster = pyslurm.config().get()['cluster_name'] for node in nodes.keys(): for partition in nodes[node]['partitions']: labels_ = [cluster, partition] + [str(nodes[node][prop]) for prop in self.props] if 'METRIC_VALUE_NULL' in os.environ and os.environ['METRIC_VALUE_NULL'].lower() == 'include': NODES_CPUS.add_metric(labels_, nodes[node]['cpus']) NODES_CPUS_ALLOC.add_metric(labels_, nodes[node]['alloc_cpus']) NODES_CPU_LOAD.add_metric(labels_, nodes[node]['cpu_load']/100.0) NODES_MEM_TOTAL.add_metric(labels_, nodes[node]['real_memory']*1000**2) # MB to Bytes NODES_MEM_ALLOC.add_metric(labels_, nodes[node]['alloc_mem']*1000**2) # MB to Bytes NODES_MEM_FREE.add_metric(labels_, nodes[node]['free_mem']*2**20) # MiB to Bytes else: NODES_CPUS.add_metric(labels_, nodes[node]['cpus']) if nodes[node]['cpus'] else None NODES_CPUS_ALLOC.add_metric(labels_, nodes[node]['alloc_cpus']) if nodes[node]['alloc_cpus'] else None NODES_CPU_LOAD.add_metric(labels_, nodes[node]['cpu_load']/100.0) if nodes[node]['cpu_load'] else None NODES_MEM_TOTAL.add_metric(labels_, nodes[node]['real_memory']*1000**2) if nodes[node]['real_memory'] else None NODES_MEM_ALLOC.add_metric(labels_, nodes[node]['alloc_mem']*1000**2) if nodes[node]['alloc_mem'] else None NODES_MEM_FREE.add_metric(labels_, nodes[node]['free_mem']*2**20) if nodes[node]['free_mem'] else None yield NODES_CPUS yield NODES_CPUS_ALLOC yield NODES_CPU_LOAD yield NODES_MEM_TOTAL yield NODES_MEM_FREE yield NODES_MEM_ALLOC
def get_cluster(): nodes = pyslurm.node().get() cluster = {} cluster['name'] = pyslurm.config().get()['cluster_name'] cluster['nodes'] = len(nodes.keys()) cluster['cores'] = 0 for nodename, node in nodes.iteritems(): cluster['cores'] += node['cpus'] return jsonify(cluster)
def get_cluster(): if mocking: return mock('cluster.json') nodes = pyslurm.node().get() cluster = {} cluster['name'] = pyslurm.config().get()['cluster_name'] cluster['nodes'] = len(nodes.keys()) cluster['cores'] = 0 for nodename, node in nodes.iteritems(): cluster['cores'] += node['cpus'] return cluster
def check_private_data_for_entity(user, entity): """ Return true if the entity is one of the attribute defined previously in Private Data settings. """ onlyUsersEntities = False if auth_enabled: # Fetch the attributs of private_data config = pyslurm.config().get() private_data = config["private_data_list"] if (private_data and entity in private_data and user.role != 'admin'): onlyUsersEntities = True return onlyUsersEntities
def get_cluster(): nodes = pyslurm.node().get() cluster = {} cluster['name'] = pyslurm.config().get()['cluster_name'] cluster['nodes'] = len(nodes.keys()) cluster['cores'] = 0 for nodename, node in nodes.iteritems(): cluster['cores'] += node['cpus'] resp = jsonify({ 'authentication': { 'enabled': auth_enabled, 'guest': guests_allowed }, 'data': cluster }) return resp
def collect(self): # Metric declarations PART_NODES = GaugeMetricFamily('slurm_partitions_total_nodes', 'Total numbers of nodes per partition grouped by {}'.format(', '.join(self.labels)), labels=self.labels) PART_CPUS = GaugeMetricFamily('slurm_partitions_total_cpus', 'Total numbers of CPUs per partition grouped by {}'.format(', '.join(self.labels)), labels=self.labels) PART_STATE = GaugeMetricFamily('slurm_partitions_state', 'Partition states grouped by {}'.format(', '.join(self.labels)), labels=self.labels) # Load part info from Slurm cluster = pyslurm.config().get()['cluster_name'] partitions = pyslurm.partition().get() # Update the metrics for partition in partitions.keys(): PART_NODES.add_metric([cluster, partition], partitions[partition]['total_nodes']) PART_CPUS.add_metric( [cluster, partition], partitions[partition]['total_cpus']) PART_STATE.add_metric([cluster, partition], int(partitions[partition]['state'] == 'UP')) yield PART_NODES yield PART_CPUS yield PART_STATE
def test_config_scontrol(): """Config: Compare scontrol values to PySlurm values.""" config_info = pyslurm.config().get() sctl = subprocess.Popen(["scontrol", "-d", "show", "config"], stdout=subprocess.PIPE).communicate() sctl_stdout = sctl[0].strip().decode("UTF-8").split("\n") sctl_dict = dict( (item.split("=", 1)[0].strip(), item.split("=", 1)[1].strip()) for item in sctl_stdout if "=" in item) assert_equals(config_info["accounting_storage_host"], sctl_dict["AccountingStorageHost"]) assert_equals(config_info["accounting_storage_port"], int(sctl_dict["AccountingStoragePort"])) assert_equals(config_info["accounting_storage_type"], sctl_dict["AccountingStorageType"]) assert_equals(config_info["accounting_storage_tres"], sctl_dict["AccountingStorageTRES"]) assert_equals(config_info["accounting_storage_user"], sctl_dict["AccountingStorageUser"]) assert_equals(config_info["acct_gather_energy_type"], sctl_dict["AcctGatherEnergyType"]) assert_equals(config_info["acct_gather_filesystem_type"], sctl_dict["AcctGatherFilesystemType"]) assert_equals(config_info["acct_gather_interconnect_type"], sctl_dict["AcctGatherInterconnectType"]) assert_equals(config_info["acct_gather_profile_type"], sctl_dict["AcctGatherProfileType"]) assert_equals(config_info["authtype"], sctl_dict["AuthType"]) assert_equals(config_info["checkpoint_type"], sctl_dict["CheckpointType"]) assert_equals(config_info["cluster_name"], sctl_dict["ClusterName"]) assert_equals(config_info["core_spec_plugin"], sctl_dict["CoreSpecPlugin"]) assert_equals(config_info["ext_sensors_type"], sctl_dict["ExtSensorsType"]) assert_equals(config_info["first_job_id"], int(sctl_dict["FirstJobId"])) assert_equals(config_info["job_acct_gather_type"], sctl_dict["JobAcctGatherType"]) assert_equals(config_info["job_ckpt_dir"], sctl_dict["JobCheckpointDir"]) assert_equals(config_info["job_comp_host"], sctl_dict["JobCompHost"]) assert_equals(config_info["job_comp_loc"], sctl_dict["JobCompLoc"]) assert_equals(config_info["job_comp_port"], int(sctl_dict["JobCompPort"])) assert_equals(config_info["job_comp_type"], sctl_dict["JobCompType"]) assert_equals(config_info["launch_type"], sctl_dict["LaunchType"]) assert_equals(config_info["mail_prog"], sctl_dict["MailProg"]) assert_equals(config_info["max_array_sz"], int(sctl_dict["MaxArraySize"])) assert_equals(config_info["max_job_cnt"], int(sctl_dict["MaxJobCount"])) assert_equals(config_info["max_job_id"], int(sctl_dict["MaxJobId"])) assert_equals(config_info["max_step_cnt"], int(sctl_dict["MaxStepCount"])) assert_equals(config_info["max_step_cnt"], int(sctl_dict["MaxStepCount"])) assert_equals(config_info["mpi_default"], sctl_dict["MpiDefault"]) assert_equals(config_info["next_job_id"], int(sctl_dict["NEXT_JOB_ID"])) assert_equals(config_info["plugindir"], sctl_dict["PluginDir"]) assert_equals(config_info["plugstack"], sctl_dict["PlugStackConfig"]) assert_equals(config_info["preempt_mode"], sctl_dict["PreemptMode"]) assert_equals(config_info["preempt_type"], sctl_dict["PreemptType"]) assert_equals(config_info["priority_type"], sctl_dict["PriorityType"]) assert_equals(config_info["proctrack_type"], sctl_dict["ProctrackType"]) assert_equals(config_info["propagate_rlimits"], sctl_dict["PropagateResourceLimits"]) assert_equals(config_info["route_plugin"], sctl_dict["RoutePlugin"]) assert_equals(config_info["schedtype"], sctl_dict["SchedulerType"]) assert_equals(config_info["select_type"], sctl_dict["SelectType"]) assert_equals( config_info["slurm_user_name"] + "(" + str(config_info["slurm_user_id"]) + ")", sctl_dict["SlurmUser"]) assert_equals(config_info["slurmctld_logfile"], sctl_dict["SlurmctldLogFile"]) assert_equals(config_info["slurmctld_pidfile"], sctl_dict["SlurmctldPidFile"]) assert_equals(config_info["slurmctld_port"], int(sctl_dict["SlurmctldPort"])) assert_equals(config_info["slurmd_logfile"], sctl_dict["SlurmdLogFile"]) assert_equals(config_info["slurmd_pidfile"], sctl_dict["SlurmdPidFile"]) assert_equals(config_info["slurmd_port"], int(sctl_dict["SlurmdPort"])) assert_equals(config_info["slurmd_spooldir"], sctl_dict["SlurmdSpoolDir"]) assert_equals(config_info["slurmd_spooldir"], sctl_dict["SlurmdSpoolDir"]) assert_equals( config_info["slurmd_user_name"] + "(" + str(config_info["slurmd_user_id"]) + ")", sctl_dict["SlurmdUser"]) assert_equals(config_info["slurm_conf"], sctl_dict["SLURM_CONF"]) assert_equals(config_info["state_save_location"], sctl_dict["StateSaveLocation"]) assert_equals(config_info["switch_type"], sctl_dict["SwitchType"]) assert_equals(config_info["task_plugin"], sctl_dict["TaskPlugin"]) assert_equals(config_info["topology_plugin"], sctl_dict["TopologyPlugin"]) assert_equals(config_info["tree_width"], int(sctl_dict["TreeWidth"]))
def test_config_key_pairs(): """Config: Test config().key_pairs() function and return type.""" config_key_pairs = pyslurm.config().key_pairs() assert_true(isinstance(config_key_pairs, dict))
#!/usr/bin/env python from __future__ import print_function import pyslurm import sys try: a = pyslurm.config() ctl_dict = a.get() except ValueError as e: print("Error - {0}".format(e.args[0])) sys.exit(-1) # Process the sorted Slurm configuration dictionary date_fields = ['boot_time', 'last_update'] for key in sorted(ctl_dict.items()): if key in date_fields: if ctl_dict[key] == 0: print("\t{0:<35} : N/A".format(key)) else: ddate = pyslurm.epoch2date(ctl_dict[key]) print("\t{0:<35} : {1}".format(key, ddate)) elif 'debug_flags' in key: print("\t{0:<35s} : {1}".format(key[0], pyslurm.get_debug_flags(key[1]))) else:
def collect(self): # Metric declarations JOBS_NUM = GaugeMetricFamily( 'slurm_jobs_num', 'Numbers of jobs in the cluster grouped by {}'.format(', '.join( self.labels)), labels=self.labels) JOBS_CPUS_REQ = GaugeMetricFamily( 'slurm_jobs_cpus_req', 'Numbers of CPUs requested for jobs in the cluster grouped by {}'. format(', '.join(self.labels)), labels=self.labels) JOBS_CPUS_ALLOC = GaugeMetricFamily( 'slurm_jobs_cpus_alloc', 'Numbers of CPUs allocated for jobs in the cluster grouped by {}'. format(', '.join(self.labels)), labels=self.labels) JOBS_MEM_REQ = GaugeMetricFamily( 'slurm_jobs_mem_req', 'Amounts of memory requested for jobs in the cluster grouped by {}' .format(', '.join(self.labels)), labels=self.labels, unit='bytes') JOBS_MEM_ALLOC = GaugeMetricFamily( 'slurm_jobs_mem_alloc', 'Amounts of memory allocated for jobs in the cluster grouped by {}' .format(', '.join(self.labels)), labels=self.labels, unit='bytes') JOBS_NODES_REQ = GaugeMetricFamily( 'slurm_jobs_nodes_req', 'Numbers of nodes requested for jobs in the cluster grouped by {}'. format(', '.join(self.labels)), labels=self.labels) JOBS_NODES_ALLOC = GaugeMetricFamily( 'slurm_jobs_nodes_alloc', 'Numbers of nodes allocated for jobs in the cluster grouped by {}'. format(', '.join(self.labels)), labels=self.labels) # Load job info from Slurm jobs = pyslurm.job().get() cluster = pyslurm.config().get()['cluster_name'] # Compile regular expressions rgx_cpu = re.compile(r'cpu=([0-9]+)') rgx_mem = re.compile(r'mem=([0-9]+)') rgx_node = re.compile(r'node=([0-9]+)') # Update the metrics for job_id in jobs.keys(): labels_ = [cluster] + [ self.get_user_name(jobs[job_id][prop]) if prop == 'user_id' else str(jobs[job_id][prop]) for prop in self.props ] JOBS_NUM.add_metric(labels_, 1.0) # Extract requirements and allocations if 'METRIC_VALUE_NULL' in os.environ and os.environ[ 'METRIC_VALUE_NULL'].lower() == 'include': if 'tres_res_str' in jobs[job_id].keys( ) and jobs[job_id]['tres_res_str']: m = rgx_cpu.search(jobs[job_id]['tres_req_str']) JOBS_CPUS_REQ.add_metric(labels_, int(m.group(1)) if m else None) m = rgx_mem.search(jobs[job_id]['tres_req_str']) JOBS_MEM_REQ.add_metric( labels_, int(m.group(1)) * 1000**2 if m else None) m = rgx_node.search(jobs[job_id]['tres_req_str']) JOBS_NODES_REQ.add_metric(labels_, int(m.group(1)) if m else None) if 'tres_alloc_str' in jobs[job_id].keys( ) and jobs[job_id]['tres_alloc_str']: m = rgx_cpu.search(jobs[job_id]['tres_alloc_str']) JOBS_CPUS_ALLOC.add_metric(labels_, int(m.group(1)) if m else None) m = rgx_mem.search(jobs[job_id]['tres_alloc_str']) JOBS_MEM_ALLOC.add_metric( labels_, int(m.group(1)) * 1000**2 if m else None) m = rgx_node.search(jobs[job_id]['tres_alloc_str']) JOBS_NODES_ALLOC.add_metric(labels_, int(m.group(1)) if m else None) else: if 'tres_res_str' in jobs[job_id].keys( ) and jobs[job_id]['tres_res_str']: m = rgx_cpu.search(jobs[job_id]['tres_req_str']) JOBS_CPUS_REQ.add_metric(labels_, int( m.group(1))) if m else None m = rgx_mem.search(jobs[job_id]['tres_req_str']) JOBS_MEM_REQ.add_metric(labels_, int(m.group(1)) * 1000**2) if m else None m = rgx_node.search(jobs[job_id]['tres_req_str']) JOBS_NODES_REQ.add_metric(labels_, int( m.group(1))) if m else None if 'tres_alloc_str' in jobs[job_id].keys( ) and jobs[job_id]['tres_alloc_str']: m = rgx_cpu.search(jobs[job_id]['tres_alloc_str']) JOBS_CPUS_ALLOC.add_metric(labels_, int( m.group(1))) if m else None m = rgx_mem.search(jobs[job_id]['tres_alloc_str']) JOBS_MEM_ALLOC.add_metric(labels_, int(m.group(1)) * 1000**2) if m else None m = rgx_node.search(jobs[job_id]['tres_alloc_str']) JOBS_NODES_ALLOC.add_metric(labels_, int( m.group(1))) if m else None yield JOBS_NUM yield JOBS_CPUS_REQ yield JOBS_CPUS_ALLOC yield JOBS_MEM_REQ yield JOBS_MEM_ALLOC yield JOBS_NODES_REQ yield JOBS_NODES_ALLOC
def test_config_key_pairs(): """Job: Test config().key_pairs() function and return type.""" config_key_pairs = pyslurm.config().key_pairs() assert_true(isinstance(config_key_pairs, dict))
def test_config_scontrol(): """Job: Compare scontrol values to PySlurm values.""" config_info = pyslurm.config().get() sctl = subprocess.Popen(["scontrol", "-d", "show", "config"], stdout=subprocess.PIPE).communicate() sctl_stdout = sctl[0].strip().decode("UTF-8").split("\n") sctl_dict = dict( (item.split("=", 1)[0].strip(), item.split("=", 1)[1].strip()) for item in sctl_stdout if "=" in item ) assert_equals(config_info["accounting_storage_host"], sctl_dict["AccountingStorageHost"]) assert_equals(config_info["accounting_storage_port"], int(sctl_dict["AccountingStoragePort"])) assert_equals(config_info["accounting_storage_type"], sctl_dict["AccountingStorageType"]) assert_equals(config_info["accounting_storage_tres"], sctl_dict["AccountingStorageTRES"]) assert_equals(config_info["accounting_storage_user"], sctl_dict["AccountingStorageUser"]) assert_equals(config_info["acct_gather_energy_type"], sctl_dict["AcctGatherEnergyType"]) assert_equals(config_info["acct_gather_filesystem_type"], sctl_dict["AcctGatherFilesystemType"]) assert_equals( config_info["acct_gather_interconnect_type"], sctl_dict["AcctGatherInterconnectType"] ) assert_equals(config_info["acct_gather_profile_type"], sctl_dict["AcctGatherProfileType"]) assert_equals(config_info["authtype"], sctl_dict["AuthType"]) assert_equals(config_info["checkpoint_type"], sctl_dict["CheckpointType"]) assert_equals(config_info["cluster_name"], sctl_dict["ClusterName"]) assert_equals(config_info["control_addr"], sctl_dict["ControlAddr"]) assert_equals(config_info["control_machine"], sctl_dict["ControlMachine"]) assert_equals(config_info["core_spec_plugin"], sctl_dict["CoreSpecPlugin"]) assert_equals(config_info["crypto_type"], sctl_dict["CryptoType"]) assert_equals(config_info["crypto_type"], sctl_dict["CryptoType"]) assert_equals(config_info["ext_sensors_type"], sctl_dict["ExtSensorsType"]) assert_equals(config_info["first_job_id"], int(sctl_dict["FirstJobId"])) assert_equals(config_info["job_acct_gather_type"], sctl_dict["JobAcctGatherType"]) assert_equals(config_info["job_ckpt_dir"], sctl_dict["JobCheckpointDir"]) assert_equals(config_info["job_comp_host"], sctl_dict["JobCompHost"]) assert_equals(config_info["job_comp_loc"], sctl_dict["JobCompLoc"]) assert_equals(config_info["job_comp_port"], int(sctl_dict["JobCompPort"])) assert_equals(config_info["job_comp_type"], sctl_dict["JobCompType"]) assert_equals(config_info["launch_type"], sctl_dict["LaunchType"]) assert_equals(config_info["mail_prog"], sctl_dict["MailProg"]) assert_equals(config_info["max_array_sz"], int(sctl_dict["MaxArraySize"])) assert_equals(config_info["max_job_cnt"], int(sctl_dict["MaxJobCount"])) assert_equals(config_info["max_job_id"], int(sctl_dict["MaxJobId"])) assert_equals(config_info["max_step_cnt"], int(sctl_dict["MaxStepCount"])) assert_equals(config_info["max_step_cnt"], int(sctl_dict["MaxStepCount"])) assert_equals(config_info["mpi_default"], sctl_dict["MpiDefault"]) assert_equals(config_info["next_job_id"], int(sctl_dict["NEXT_JOB_ID"])) assert_equals(config_info["plugindir"], sctl_dict["PluginDir"]) assert_equals(config_info["plugstack"], sctl_dict["PlugStackConfig"]) assert_equals(config_info["preempt_mode"], sctl_dict["PreemptMode"]) assert_equals(config_info["preempt_type"], sctl_dict["PreemptType"]) assert_equals(config_info["priority_type"], sctl_dict["PriorityType"]) assert_equals(config_info["proctrack_type"], sctl_dict["ProctrackType"]) assert_equals(config_info["propagate_rlimits"], sctl_dict["PropagateResourceLimits"]) assert_equals(config_info["route_plugin"], sctl_dict["RoutePlugin"]) assert_equals(config_info["schedtype"], sctl_dict["SchedulerType"]) assert_equals(config_info["select_type"], sctl_dict["SelectType"]) assert_equals( config_info["slurm_user_name"] + "(" + str(config_info["slurm_user_id"]) + ")", sctl_dict["SlurmUser"] ) assert_equals(config_info["slurmctld_logfile"], sctl_dict["SlurmctldLogFile"]) assert_equals(config_info["slurmctld_pidfile"], sctl_dict["SlurmctldPidFile"]) assert_equals(config_info["slurmctld_port"], int(sctl_dict["SlurmctldPort"])) assert_equals(config_info["slurmd_logfile"], sctl_dict["SlurmdLogFile"]) assert_equals(config_info["slurmd_pidfile"], sctl_dict["SlurmdPidFile"]) assert_equals(config_info["slurmd_port"], int(sctl_dict["SlurmdPort"])) assert_equals(config_info["slurmd_spooldir"], sctl_dict["SlurmdSpoolDir"]) assert_equals(config_info["slurmd_spooldir"], sctl_dict["SlurmdSpoolDir"]) assert_equals( config_info["slurmd_user_name"] + "(" + str(config_info["slurmd_user_id"]) + ")", sctl_dict["SlurmdUser"] ) assert_equals(config_info["slurm_conf"], sctl_dict["SLURM_CONF"]) assert_equals(config_info["state_save_location"], sctl_dict["StateSaveLocation"]) assert_equals(config_info["switch_type"], sctl_dict["SwitchType"]) assert_equals(config_info["task_plugin"], sctl_dict["TaskPlugin"]) assert_equals(config_info["topology_plugin"], sctl_dict["TopologyPlugin"]) assert_equals(config_info["tree_width"], int(sctl_dict["TreeWidth"]))
def test_config_get(): """Config: Test config().get() return type.""" config_info = pyslurm.config().get() assert_true(isinstance(config_info, dict))
def test_config_get(): """Job: Test config().get() return type.""" config_info = pyslurm.config().get() assert_true(isinstance(config_info, dict))
def test_config_display_all(): """Job: Test config().display_all() function.""" config_display_all = pyslurm.config().display_all()
def config(): return pyslurm.config().get()
#!/usr/bin/env python import pyslurm a = pyslurm.config() ctl_dict = a.get() # Process the sorted SLURM configuration dictionary date_fields = [ 'boot_time', 'last_update' ] for key in sorted(ctl_dict.iterkeys()): if key in date_fields: if ctl_dict[key] == 0: print "\t%-35s : N/A" % (key) else: ddate = pyslurm.epoch2date(ctl_dict[key]) print "\t%-35s : %s" % (key, ddate) elif 'debug_flags' in key: print "\t%-35s : %s" % (key, pyslurm.get_debug_flags(ctl_dict[key])) else: if 'key_pairs' not in key: print "\t%-35s : %s" % (key, ctl_dict[key]) if ctl_dict.has_key('key_pairs'): print "" print "Additional Information :" print "------------------------"