def get_required_nodes(instance_properties, max_size): command = "/opt/torque/bin/qstat -at" # Example output of torque # Req'd Req'd Elap # Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time # ----------------------- ----------- -------- ---------------- ------ ----- ------ --------- --------- - --------- # 0.ip-172-31-11-1.ec2.i centos batch job.sh 5343 5 30 -- 01:00:00 Q 00:04:58 # 1.ip-172-31-11-1.ec2.i centos batch job.sh 5340 3 6 -- 01:00:00 R 00:08:14 # 2.ip-172-31-11-1.ec2.i centos batch job.sh 5387 2 4 -- 01:00:00 R 00:08:27 status = ["Q"] _output = check_command_output(command) output = _output.split("\n")[5:] slots_requested = [] nodes_requested = [] for line in output: line_arr = line.split() if len(line_arr) >= 10 and line_arr[9] in status: # if a job has been looked at to account for pending nodes, don't look at it again slots_requested.append(int(line_arr[6])) nodes_requested.append(int(line_arr[5])) return get_optimal_nodes(nodes_requested, slots_requested, instance_properties)
def get_required_nodes(instance_properties): log.info("Computing number of required nodes for submitted jobs") command = "/opt/slurm/bin/squeue -r -h -o '%i-%t-%D-%C-%r'" # Example output of squeue # 1-PD-1-24-Nodes required for job are DOWN, DRAINED or reserved for jobs in higher priority partitions # 2-PD-1-24-Licenses # 3-PD-1-24-PartitionNodeLimit # 4-R-1-24- output = check_command_output(command, log) slots_requested = [] nodes_requested = [] output = output.split("\n") for line in output: line_arr = line.split("-") if len(line_arr) == 5 and line_arr[1] == 'PD': if line_arr[4] in PENDING_RESOURCES_REASONS: slots_requested.append(int(line_arr[3])) nodes_requested.append(int(line_arr[2])) else: log.info("Skipping pending job %s due to pending reason: %s", line_arr[0], line_arr[4]) return get_optimal_nodes(nodes_requested, slots_requested, instance_properties)
def get_required_nodes(instance_properties): command = "/opt/torque/bin/qstat -at" # Example output of torque # Req'd Req'd Elap # Job ID Username Queue Jobname SessID NDS TSK Memory Time S Time # ----------------------- ----------- -------- ---------------- ------ ----- ------ --------- --------- - --------- # 0.ip-172-31-11-1.ec2.i centos batch job.sh 5343 5 30 -- 01:00:00 Q 00:04:58 # 1.ip-172-31-11-1.ec2.i centos batch job.sh 5340 3 6 -- 01:00:00 R 00:08:14 # 2.ip-172-31-11-1.ec2.i centos batch job.sh 5387 2 4 -- 01:00:00 R 00:08:27 status = ['Q'] _output = check_command_output(command, log) output = _output.split("\n")[5:] slots_requested = [] nodes_requested = [] for line in output: line_arr = line.split() if len(line_arr) >= 10 and line_arr[9] in status: # if a job has been looked at to account for pending nodes, don't look at it again slots_requested.append(int(line_arr[6])) nodes_requested.append(int(line_arr[5])) return get_optimal_nodes(nodes_requested, slots_requested, instance_properties)
def test_empty_lists(self): nodes = utils.get_optimal_nodes([], [], instance_properties) expected = 0 self.assertEqual(nodes, expected, "test_empty_lists failed. Got %s; Expected: %s" % (nodes, expected))
def test_each_node_partial_capacity(self): nodes = utils.get_optimal_nodes([1, 5, 3, 2], [6, 35, 1, 1], instance_properties) expected = 6 self.assertEqual(nodes, expected, "test_each_node_partial_capacity failed: Got %s; Expected: %s" % (nodes, expected))
def test_each_node_one_vcpu_except_max(self): nodes = utils.get_optimal_nodes([1, 5, 3], [1, 40, 1], instance_properties) expected = 8 self.assertEqual(nodes, expected, "test_each_node_one_vcpu_except_max failed: Got %s; Expected: %s" % (nodes, expected))
def test_each_node_half_capacity(self): nodes = utils.get_optimal_nodes([1, 5, 3], [4, 20, 12], instance_properties) expected = 5 self.assertEqual(nodes, expected, "test_exact_fit failed: Got %s; Expected: %s" % (nodes, expected))
def test_only_vcpus(self): nodes = utils.get_optimal_nodes([1], [27], instance_properties) expected = 4 self.assertEqual(nodes, expected, "test_exact_fit failed. Got %s; Expected: %s" % (nodes, expected))
def test_each_node_at_capacity(self): nodes = utils.get_optimal_nodes([1, 5, 3], [8, 40, 24], instance_properties) expected = 9 self.assertEqual(nodes, expected, "test_exact_fit failed. Got %s; Expected: %s" % (nodes, expected))