def test_parse_output_valid(): """Test `SlurmScheduler.parse_output` for valid arguments.""" number_of_fields = len(SlurmScheduler._detailed_job_info_fields) # pylint: disable=protected-access detailed_job_info = {'stdout': f"Header\n{'|' * number_of_fields}"} scheduler = SlurmScheduler() assert scheduler.parse_output(detailed_job_info, '', '') is None
def test_submit_script_with_num_cores_per_machine_and_mpiproc1(self): # pylint: disable=invalid-name """ Test to verify if scripts works fine if we pass both num_cores_per_machine and num_cores_per_mpiproc correct values. It should pass in check: res.num_cores_per_mpiproc * res.num_mpiprocs_per_machine = res.num_cores_per_machine """ from aiida.schedulers.datastructures import JobTemplate from aiida.common.datastructures import CodeInfo, CodeRunMode scheduler = SlurmScheduler() job_tmpl = JobTemplate() job_tmpl.shebang = '#!/bin/bash' job_tmpl.job_resource = scheduler.create_job_resource( num_machines=1, num_mpiprocs_per_machine=1, num_cores_per_machine=24, num_cores_per_mpiproc=24 ) job_tmpl.uuid = str(uuid.uuid4()) job_tmpl.max_wallclock_seconds = 24 * 3600 code_info = CodeInfo() code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1'] code_info.stdin_name = 'aiida.in' job_tmpl.codes_info = [code_info] job_tmpl.codes_run_mode = CodeRunMode.SERIAL submit_script_text = scheduler.get_submit_script(job_tmpl) assert '#SBATCH --no-requeue' in submit_script_text assert '#SBATCH --time=1-00:00:00' in submit_script_text assert '#SBATCH --nodes=1' in submit_script_text assert '#SBATCH --ntasks-per-node=1' in submit_script_text assert '#SBATCH --cpus-per-task=24' in submit_script_text assert "'mpirun' '-np' '23' 'pw.x' '-npool' '1' < 'aiida.in'" in submit_script_text
def test_joblist_multi(self): """Test that asking for multiple jobs does not result in duplications.""" scheduler = SlurmScheduler() command = scheduler._get_joblist_command(jobs=['123', '456']) # pylint: disable=protected-access assert '123,456' in command assert '456,456' not in command
def test_submit_script_bad_shebang(self): """Test that first line of submit script is as expected.""" from aiida.schedulers.datastructures import JobTemplate from aiida.common.datastructures import CodeInfo, CodeRunMode scheduler = SlurmScheduler() code_info = CodeInfo() code_info.cmdline_params = [ 'mpirun', '-np', '23', 'pw.x', '-npool', '1' ] code_info.stdin_name = 'aiida.in' for (shebang, expected_first_line) in ((None, '#!/bin/bash'), ('', ''), ('NOSET', '#!/bin/bash')): job_tmpl = JobTemplate() if shebang == 'NOSET': pass else: job_tmpl.shebang = shebang job_tmpl.job_resource = scheduler.create_job_resource( num_machines=1, num_mpiprocs_per_machine=1) job_tmpl.codes_info = [code_info] job_tmpl.codes_run_mode = CodeRunMode.SERIAL submit_script_text = scheduler.get_submit_script(job_tmpl) # This tests if the implementation correctly chooses the default: self.assertEqual( submit_script_text.split('\n')[0], expected_first_line)
def test_submit_script(self): """ Test the creation of a simple submission script. """ from aiida.schedulers.datastructures import JobTemplate from aiida.common.datastructures import CodeInfo, CodeRunMode scheduler = SlurmScheduler() job_tmpl = JobTemplate() job_tmpl.shebang = '#!/bin/bash' job_tmpl.uuid = str(uuid.uuid4()) job_tmpl.job_resource = scheduler.create_job_resource(num_machines=1, num_mpiprocs_per_machine=1) job_tmpl.max_wallclock_seconds = 24 * 3600 code_info = CodeInfo() code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1'] code_info.stdin_name = 'aiida.in' job_tmpl.codes_info = [code_info] job_tmpl.codes_run_mode = CodeRunMode.SERIAL submit_script_text = scheduler.get_submit_script(job_tmpl) assert submit_script_text.startswith('#!/bin/bash') assert '#SBATCH --no-requeue' in submit_script_text assert '#SBATCH --time=1-00:00:00' in submit_script_text assert '#SBATCH --nodes=1' in submit_script_text assert "'mpirun' '-np' '23' 'pw.x' '-npool' '1' < 'aiida.in'" in submit_script_text
def test_time_conversion(value, expected): """ Test conversion of (relative) times. From docs, acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", "days-hours", "days-hours:minutes" and "days-hours:minutes:seconds". """ # pylint: disable=protected-access scheduler = SlurmScheduler() assert scheduler._convert_time(value) == expected
def test_parse_failed_squeue_output(self): """ Test that _parse_joblist_output reacts as expected to failures. """ scheduler = SlurmScheduler() # non-zero return value should raise with self.assertRaises(SchedulerError): _ = scheduler._parse_joblist_output(1, TEXT_SQUEUE_TO_TEST, '') # pylint: disable=protected-access # non-empty stderr should be logged with self.assertLogs(scheduler.logger, 'WARNING'): _ = scheduler._parse_joblist_output(0, TEXT_SQUEUE_TO_TEST, 'error message') # pylint: disable=protected-access
def test_parse_out_of_memory(): """Test that for job that failed due to OOM `parse_output` return the `ERROR_SCHEDULER_OUT_OF_MEMORY` code.""" from aiida.engine import CalcJob scheduler = SlurmScheduler() stdout = '' stderr = '' detailed_job_info = { 'retval': 0, 'stderr': '', 'stdout': """|||||||||||||||||||||||||||||||||||||||||||||||||| |||||||||||||||||||||||||||||||||||||||||OUT_OF_MEMORY|||||||||""" } # yapf: disable exit_code = scheduler.parse_output(detailed_job_info, stdout, stderr) assert exit_code == CalcJob.exit_codes.ERROR_SCHEDULER_OUT_OF_MEMORY # pylint: disable=no-member
def test_parse_common_joblist_output(self): """ Test whether _parse_joblist_output can parse the squeue output """ scheduler = SlurmScheduler() retval = 0 stdout = TEXT_SQUEUE_TO_TEST stderr = '' job_list = scheduler._parse_joblist_output(retval, stdout, stderr) # pylint: disable=protected-access job_dict = {j.job_id: j for j in job_list} # The parameters are hard coded in the text to parse job_parsed = len(job_list) assert job_parsed == JOBS_ON_CLUSTER job_running_parsed = len([j for j in job_list if j.job_state \ and j.job_state == JobState.RUNNING]) assert len(JOBS_RUNNING) == job_running_parsed job_held_parsed = len([j for j in job_list if j.job_state and j.job_state == JobState.QUEUED_HELD]) assert JOBS_HELD == job_held_parsed job_queued_parsed = len([j for j in job_list if j.job_state and j.job_state == JobState.QUEUED]) assert JOBS_QUEUED == job_queued_parsed parsed_running_users = [j.job_owner for j in job_list if j.job_state and j.job_state == JobState.RUNNING] assert set(USERS_RUNNING) == set(parsed_running_users) parsed_running_jobs = [j.job_id for j in job_list if j.job_state and j.job_state == JobState.RUNNING] assert set(JOBS_RUNNING) == set(parsed_running_jobs) assert job_dict['863553'].requested_wallclock_time_seconds, 30 * 60 # pylint: disable=invalid-name assert job_dict['863553'].wallclock_time_seconds, 29 * 60 + 29 assert job_dict['863553'].dispatch_time, datetime.datetime(2013, 5, 23, 11, 44, 11) assert job_dict['863553'].submission_time, datetime.datetime(2013, 5, 23, 10, 42, 11) assert job_dict['863100'].annotation == 'Resources' assert job_dict['863100'].num_machines == 32 assert job_dict['863100'].num_mpiprocs == 1024 assert job_dict['863100'].queue_name == 'normal' assert job_dict['861352'].title == 'Pressure_PBEsol_0' assert job_dict['863554'].requested_wallclock_time_seconds is None # pylint: disable=invalid-name
def test_submit_script_with_num_cores_per_machine_and_mpiproc2(self): # pylint: disable=invalid-name """ Test to verify if scripts works fine if we pass num_cores_per_machine and num_cores_per_mpiproc wrong values. It should fail in check: res.num_cores_per_mpiproc * res.num_mpiprocs_per_machine = res.num_cores_per_machine """ from aiida.schedulers.datastructures import JobTemplate scheduler = SlurmScheduler() job_tmpl = JobTemplate() with pytest.raises(ValueError, match='`num_cores_per_machine` must be equal to'): job_tmpl.job_resource = scheduler.create_job_resource( num_machines=1, num_mpiprocs_per_machine=1, num_cores_per_machine=24, num_cores_per_mpiproc=23 )
def test_time_conversion_errors(caplog): """Test conversion of (relative) times for bad inputs.""" # pylint: disable=protected-access scheduler = SlurmScheduler() # Disable logging to avoid excessive output during test with caplog.at_level(logging.CRITICAL): with pytest.raises(ValueError, match='Unrecognized format for time string.'): # Empty string not valid scheduler._convert_time('') with pytest.raises(ValueError, match='Unrecognized format for time string.'): # there should be something after the dash scheduler._convert_time('1-') with pytest.raises(ValueError, match='Unrecognized format for time string.'): # there should be something after the dash # there cannot be a dash after the colons scheduler._convert_time('1:2-3')
def test_parse_output_invalid(detailed_job_info, expected): """Test `SlurmScheduler.parse_output` for various invalid arguments.""" scheduler = SlurmScheduler() with pytest.raises(expected): scheduler.parse_output(detailed_job_info, '', '')
def test_joblist_single(self): """Test that asking for a single job results in duplication of the list.""" scheduler = SlurmScheduler() command = scheduler._get_joblist_command(jobs=['123']) # pylint: disable=protected-access assert '123,123' in command
def test_parse_common_joblist_output(self): """ Test whether _parse_joblist can parse the qstat -f output """ scheduler = SlurmScheduler() retval = 0 stdout = TEXT_SQUEUE_TO_TEST stderr = '' job_list = scheduler._parse_joblist_output(retval, stdout, stderr) # pylint: disable=protected-access job_dict = {j.job_id: j for j in job_list} # The parameters are hard coded in the text to parse job_parsed = len(job_list) self.assertEqual(job_parsed, JOBS_ON_CLUSTER) job_running_parsed = len([j for j in job_list if j.job_state \ and j.job_state == JobState.RUNNING]) self.assertEqual(len(JOBS_RUNNING), job_running_parsed) job_held_parsed = len([ j for j in job_list if j.job_state and j.job_state == JobState.QUEUED_HELD ]) self.assertEqual(JOBS_HELD, job_held_parsed) job_queued_parsed = len([ j for j in job_list if j.job_state and j.job_state == JobState.QUEUED ]) self.assertEqual(JOBS_QUEUED, job_queued_parsed) parsed_running_users = [ j.job_owner for j in job_list if j.job_state and j.job_state == JobState.RUNNING ] self.assertEqual(set(USERS_RUNNING), set(parsed_running_users)) parsed_running_jobs = [ j.job_id for j in job_list if j.job_state and j.job_state == JobState.RUNNING ] self.assertEqual(set(JOBS_RUNNING), set(parsed_running_jobs)) self.assertEqual(job_dict['863553'].requested_wallclock_time_seconds, 30 * 60) # pylint: disable=invalid-name self.assertEqual(job_dict['863553'].wallclock_time_seconds, 29 * 60 + 29) self.assertEqual(job_dict['863553'].dispatch_time, datetime.datetime(2013, 5, 23, 11, 44, 11)) self.assertEqual(job_dict['863553'].submission_time, datetime.datetime(2013, 5, 23, 10, 42, 11)) self.assertEqual(job_dict['863100'].annotation, 'Resources') self.assertEqual(job_dict['863100'].num_machines, 32) self.assertEqual(job_dict['863100'].num_mpiprocs, 1024) self.assertEqual(job_dict['863100'].queue_name, 'normal') self.assertEqual(job_dict['861352'].title, 'Pressure_PBEsol_0') self.assertEqual(job_dict['863554'].requested_wallclock_time_seconds, None) # pylint: disable=invalid-name
def test_time_conversion(self): """ Test conversion of (relative) times. From docs, acceptable time formats include "minutes", "minutes:seconds", "hours:minutes:seconds", "days-hours", "days-hours:minutes" and "days-hours:minutes:seconds". """ # pylint: disable=protected-access scheduler = SlurmScheduler() self.assertEqual(scheduler._convert_time('2'), 2 * 60) self.assertEqual(scheduler._convert_time('02'), 2 * 60) self.assertEqual(scheduler._convert_time('02:3'), 2 * 60 + 3) self.assertEqual(scheduler._convert_time('02:03'), 2 * 60 + 3) self.assertEqual(scheduler._convert_time('1:02:03'), 3600 + 2 * 60 + 3) self.assertEqual(scheduler._convert_time('01:02:03'), 3600 + 2 * 60 + 3) self.assertEqual(scheduler._convert_time('1-3'), 86400 + 3 * 3600) self.assertEqual(scheduler._convert_time('01-3'), 86400 + 3 * 3600) self.assertEqual(scheduler._convert_time('01-03'), 86400 + 3 * 3600) self.assertEqual(scheduler._convert_time('1-3:5'), 86400 + 3 * 3600 + 5 * 60) self.assertEqual(scheduler._convert_time('01-3:05'), 86400 + 3 * 3600 + 5 * 60) self.assertEqual(scheduler._convert_time('01-03:05'), 86400 + 3 * 3600 + 5 * 60) self.assertEqual(scheduler._convert_time('1-3:5:7'), 86400 + 3 * 3600 + 5 * 60 + 7) self.assertEqual(scheduler._convert_time('01-3:05:7'), 86400 + 3 * 3600 + 5 * 60 + 7) self.assertEqual(scheduler._convert_time('01-03:05:07'), 86400 + 3 * 3600 + 5 * 60 + 7) self.assertEqual(scheduler._convert_time('UNLIMITED'), 2**31 - 1) self.assertEqual(scheduler._convert_time('NOT_SET'), None) # Disable logging to avoid excessive output during test logging.disable(logging.ERROR) with self.assertRaises(ValueError): # Empty string not valid scheduler._convert_time('') with self.assertRaises(ValueError): # there should be something after the dash scheduler._convert_time('1-') with self.assertRaises(ValueError): # there should be something after the dash # there cannot be a dash after the colons scheduler._convert_time('1:2-3') # Reset logging level logging.disable(logging.NOTSET)