示例#1
0
    def test_submit_script_bad_shebang(self):
        from aiida.schedulers.datastructures import JobTemplate
        from aiida.common.datastructures import CodeInfo, CodeRunMode

        scheduler = SlurmScheduler()
        code_info = CodeInfo()
        code_info.cmdline_params = ['mpirun', '-np', '23', 'pw.x', '-npool', '1']
        code_info.stdin_name = 'aiida.in'

        for (shebang, expected_first_line) in ((None, '#!/bin/bash'), ('', ''), ('NOSET', '#!/bin/bash')):
            job_tmpl = JobTemplate()
            if shebang == 'NOSET':
                pass
            else:
                job_tmpl.shebang = shebang
            job_tmpl.job_resource = scheduler.create_job_resource(num_machines=1, num_mpiprocs_per_machine=1)
            job_tmpl.codes_info = [code_info]
            job_tmpl.codes_run_mode = CodeRunMode.SERIAL

            submit_script_text = scheduler.get_submit_script(job_tmpl)

            # This tests if the implementation correctly chooses the default:
            self.assertEquals(submit_script_text.split('\n')[0], expected_first_line)
示例#2
0
    def test_submit_script(self):
        from aiida.schedulers.datastructures import JobTemplate

        sge = SgeScheduler()

        job_tmpl = JobTemplate()
        job_tmpl.job_resource = sge.create_job_resource(parallel_env='mpi8',
                                                        tot_num_mpiprocs=16)
        job_tmpl.working_directory = '/home/users/dorigm7s/test'
        job_tmpl.submit_as_hold = None
        job_tmpl.rerunnable = None
        job_tmpl.email = None
        job_tmpl.email_on_started = None
        job_tmpl.email_on_terminated = None
        job_tmpl.job_name = 'BestJobEver'
        job_tmpl.sched_output_path = None
        job_tmpl.sched_join_files = None
        job_tmpl.queue_name = 'FavQ.q'
        job_tmpl.priority = None
        job_tmpl.max_wallclock_seconds = '3600'  # "23:59:59"
        job_tmpl.job_environment = {
            'HOME': '/home/users/dorigm7s/',
            'WIENROOT': '$HOME:/WIEN2k'
        }

        submit_script_text = sge._get_submit_script_header(job_tmpl)

        self.assertTrue(
            '#$ -wd /home/users/dorigm7s/test' in submit_script_text)
        self.assertTrue('#$ -N BestJobEver' in submit_script_text)
        self.assertTrue('#$ -q FavQ.q' in submit_script_text)
        self.assertTrue('#$ -l h_rt=01:00:00' in submit_script_text)
        # self.assertTrue( 'export HOME=/home/users/dorigm7s/'
        #                 in submit_script_text )
        self.assertTrue(
            '# ENVIRONMENT VARIABLES BEGIN ###' in submit_script_text)
        self.assertTrue(
            "export HOME='/home/users/dorigm7s/'" in submit_script_text)
        self.assertTrue(
            "export WIENROOT='$HOME:/WIEN2k'" in submit_script_text)
示例#3
0
    def presubmit(self, folder):
        """Prepares the calculation folder with all inputs, ready to be copied to the cluster.

        :param folder: a SandboxFolder that can be used to write calculation input files and the scheduling script.
        :type folder: :class:`aiida.common.folders.Folder`

        :return calcinfo: the CalcInfo object containing the information needed by the daemon to handle operations.
        :rtype calcinfo: :class:`aiida.common.CalcInfo`
        """
        # pylint: disable=too-many-locals,too-many-statements,too-many-branches
        import os

        from aiida.common.exceptions import PluginInternalError, ValidationError, InvalidOperation, InputValidationError
        from aiida.common import json
        from aiida.common.utils import validate_list_of_string_tuples
        from aiida.common.datastructures import CodeInfo, CodeRunMode
        from aiida.orm import load_node, Code, Computer
        from aiida.plugins import DataFactory
        from aiida.schedulers.datastructures import JobTemplate

        computer = self.node.computer
        inputs = self.node.get_incoming(link_type=LinkType.INPUT_CALC)

        if not self.inputs.metadata.dry_run and self.node.has_cached_links():
            raise InvalidOperation('calculation node has unstored links in cache')

        codes = [_ for _ in inputs.all_nodes() if isinstance(_, Code)]

        for code in codes:
            if not code.can_run_on(computer):
                raise InputValidationError('The selected code {} for calculation {} cannot run on computer {}'.format(
                    code.pk, self.node.pk, computer.name))

            if code.is_local() and code.get_local_executable() in folder.get_content_list():
                raise PluginInternalError('The plugin created a file {} that is also the executable name!'.format(
                    code.get_local_executable()))

        calc_info = self.prepare_for_submission(folder)
        calc_info.uuid = str(self.node.uuid)
        scheduler = computer.get_scheduler()

        # I create the job template to pass to the scheduler
        job_tmpl = JobTemplate()
        job_tmpl.shebang = computer.get_shebang()
        job_tmpl.submit_as_hold = False
        job_tmpl.rerunnable = False
        job_tmpl.job_environment = {}
        # 'email', 'email_on_started', 'email_on_terminated',
        job_tmpl.job_name = 'aiida-{}'.format(self.node.pk)
        job_tmpl.sched_output_path = self.options.scheduler_stdout
        if self.options.scheduler_stderr == self.options.scheduler_stdout:
            job_tmpl.sched_join_files = True
        else:
            job_tmpl.sched_error_path = self.options.scheduler_stderr
            job_tmpl.sched_join_files = False

        # Set retrieve path, add also scheduler STDOUT and STDERR
        retrieve_list = (calc_info.retrieve_list if calc_info.retrieve_list is not None else [])
        if (job_tmpl.sched_output_path is not None and job_tmpl.sched_output_path not in retrieve_list):
            retrieve_list.append(job_tmpl.sched_output_path)
        if not job_tmpl.sched_join_files:
            if (job_tmpl.sched_error_path is not None and job_tmpl.sched_error_path not in retrieve_list):
                retrieve_list.append(job_tmpl.sched_error_path)
        self.node.set_retrieve_list(retrieve_list)

        retrieve_singlefile_list = (calc_info.retrieve_singlefile_list
                                    if calc_info.retrieve_singlefile_list is not None else [])
        # a validation on the subclasses of retrieve_singlefile_list
        for _, subclassname, _ in retrieve_singlefile_list:
            file_sub_class = DataFactory(subclassname)
            if not issubclass(file_sub_class, orm.SinglefileData):
                raise PluginInternalError(
                    '[presubmission of calc {}] retrieve_singlefile_list subclass problem: {} is '
                    'not subclass of SinglefileData'.format(self.node.pk, file_sub_class.__name__))
        if retrieve_singlefile_list:
            self.node.set_retrieve_singlefile_list(retrieve_singlefile_list)

        # Handle the retrieve_temporary_list
        retrieve_temporary_list = (calc_info.retrieve_temporary_list
                                   if calc_info.retrieve_temporary_list is not None else [])
        self.node.set_retrieve_temporary_list(retrieve_temporary_list)

        # the if is done so that if the method returns None, this is
        # not added. This has two advantages:
        # - it does not add too many \n\n if most of the prepend_text are empty
        # - most importantly, skips the cases in which one of the methods
        #   would return None, in which case the join method would raise
        #   an exception
        prepend_texts = [computer.get_prepend_text()] + \
            [code.get_prepend_text() for code in codes] + \
            [calc_info.prepend_text, self.node.get_option('prepend_text')]
        job_tmpl.prepend_text = '\n\n'.join(prepend_text for prepend_text in prepend_texts if prepend_text)

        append_texts = [self.node.get_option('append_text'), calc_info.append_text] + \
            [code.get_append_text() for code in codes] + \
            [computer.get_append_text()]
        job_tmpl.append_text = '\n\n'.join(append_text for append_text in append_texts if append_text)

        # Set resources, also with get_default_mpiprocs_per_machine
        resources = self.node.get_option('resources')
        scheduler.preprocess_resources(resources, computer.get_default_mpiprocs_per_machine())
        job_tmpl.job_resource = scheduler.create_job_resource(**resources)

        subst_dict = {'tot_num_mpiprocs': job_tmpl.job_resource.get_tot_num_mpiprocs()}

        for key, value in job_tmpl.job_resource.items():
            subst_dict[key] = value
        mpi_args = [arg.format(**subst_dict) for arg in computer.get_mpirun_command()]
        extra_mpirun_params = self.node.get_option('mpirun_extra_params')  # same for all codes in the same calc

        # set the codes_info
        if not isinstance(calc_info.codes_info, (list, tuple)):
            raise PluginInternalError('codes_info passed to CalcInfo must be a list of CalcInfo objects')

        codes_info = []
        for code_info in calc_info.codes_info:

            if not isinstance(code_info, CodeInfo):
                raise PluginInternalError('Invalid codes_info, must be a list of CodeInfo objects')

            if code_info.code_uuid is None:
                raise PluginInternalError('CalcInfo should have '
                                          'the information of the code '
                                          'to be launched')
            this_code = load_node(code_info.code_uuid, sub_classes=(Code,))

            this_withmpi = code_info.withmpi  # to decide better how to set the default
            if this_withmpi is None:
                if len(calc_info.codes_info) > 1:
                    raise PluginInternalError('For more than one code, it is '
                                              'necessary to set withmpi in '
                                              'codes_info')
                else:
                    this_withmpi = self.node.get_option('withmpi')

            if this_withmpi:
                this_argv = (mpi_args + extra_mpirun_params + [this_code.get_execname()] +
                             (code_info.cmdline_params if code_info.cmdline_params is not None else []))
            else:
                this_argv = [this_code.get_execname()] + (code_info.cmdline_params
                                                          if code_info.cmdline_params is not None else [])

            # overwrite the old cmdline_params and add codename and mpirun stuff
            code_info.cmdline_params = this_argv

            codes_info.append(code_info)
        job_tmpl.codes_info = codes_info

        # set the codes execution mode

        if len(codes) > 1:
            try:
                job_tmpl.codes_run_mode = calc_info.codes_run_mode
            except KeyError:
                raise PluginInternalError('Need to set the order of the code execution (parallel or serial?)')
        else:
            job_tmpl.codes_run_mode = CodeRunMode.SERIAL
        ########################################################################

        custom_sched_commands = self.node.get_option('custom_scheduler_commands')
        if custom_sched_commands:
            job_tmpl.custom_scheduler_commands = custom_sched_commands

        job_tmpl.import_sys_environment = self.node.get_option('import_sys_environment')

        job_tmpl.job_environment = self.node.get_option('environment_variables')

        queue_name = self.node.get_option('queue_name')
        account = self.node.get_option('account')
        qos = self.node.get_option('qos')
        if queue_name is not None:
            job_tmpl.queue_name = queue_name
        if account is not None:
            job_tmpl.account = account
        if qos is not None:
            job_tmpl.qos = qos
        priority = self.node.get_option('priority')
        if priority is not None:
            job_tmpl.priority = priority
        max_memory_kb = self.node.get_option('max_memory_kb')
        if max_memory_kb is not None:
            job_tmpl.max_memory_kb = max_memory_kb
        max_wallclock_seconds = self.node.get_option('max_wallclock_seconds')
        if max_wallclock_seconds is not None:
            job_tmpl.max_wallclock_seconds = max_wallclock_seconds
        max_memory_kb = self.node.get_option('max_memory_kb')
        if max_memory_kb is not None:
            job_tmpl.max_memory_kb = max_memory_kb

        submit_script_filename = self.node.get_option('submit_script_filename')
        script_content = scheduler.get_submit_script(job_tmpl)
        folder.create_file_from_filelike(io.StringIO(script_content), submit_script_filename, 'w', encoding='utf8')

        subfolder = folder.get_subfolder('.aiida', create=True)
        subfolder.create_file_from_filelike(io.StringIO(json.dumps(job_tmpl)), 'job_tmpl.json', 'w', encoding='utf8')
        subfolder.create_file_from_filelike(io.StringIO(json.dumps(calc_info)), 'calcinfo.json', 'w', encoding='utf8')

        if calc_info.local_copy_list is None:
            calc_info.local_copy_list = []

        if calc_info.remote_copy_list is None:
            calc_info.remote_copy_list = []

        # Some validation
        this_pk = self.node.pk if self.node.pk is not None else '[UNSTORED]'
        local_copy_list = calc_info.local_copy_list
        try:
            validate_list_of_string_tuples(local_copy_list, tuple_length=3)
        except ValidationError as exc:
            raise PluginInternalError('[presubmission of calc {}] '
                                      'local_copy_list format problem: {}'.format(this_pk, exc))

        remote_copy_list = calc_info.remote_copy_list
        try:
            validate_list_of_string_tuples(remote_copy_list, tuple_length=3)
        except ValidationError as exc:
            raise PluginInternalError('[presubmission of calc {}] '
                                      'remote_copy_list format problem: {}'.format(this_pk, exc))

        for (remote_computer_uuid, _, dest_rel_path) in remote_copy_list:
            try:
                Computer.objects.get(uuid=remote_computer_uuid)  # pylint: disable=unused-variable
            except exceptions.NotExistent:
                raise PluginInternalError('[presubmission of calc {}] '
                                          'The remote copy requires a computer with UUID={}'
                                          'but no such computer was found in the '
                                          'database'.format(this_pk, remote_computer_uuid))
            if os.path.isabs(dest_rel_path):
                raise PluginInternalError('[presubmission of calc {}] '
                                          'The destination path of the remote copy '
                                          'is absolute! ({})'.format(this_pk, dest_rel_path))

        return calc_info
示例#4
0
def test_job_tmpl_errors():
    """Test the raising of the appropriate errors"""
    from aiida.schedulers.datastructures import JobTemplate
    from aiida.common.datastructures import CodeRunMode

    scheduler = LsfScheduler()
    job_tmpl = JobTemplate()

    # Raises for missing resources with tot_num_mpiprocs
    with pytest.raises(ValueError):
        scheduler.get_submit_script(job_tmpl)
    job_tmpl.job_resource = scheduler.create_job_resource(tot_num_mpiprocs=2)
    job_tmpl.codes_info = []

    # Raises for missing codes_run_mode
    with pytest.raises(NotImplementedError):
        scheduler.get_submit_script(job_tmpl)
    job_tmpl.codes_run_mode = CodeRunMode.SERIAL

    # Incorrect setups
    job_tmpl.max_wallclock_seconds = 'Not-a-Number'
    with pytest.raises(ValueError):
        scheduler.get_submit_script(job_tmpl)
    job_tmpl.pop('max_wallclock_seconds')

    job_tmpl.max_memory_kb = 'Not-a-Number'
    with pytest.raises(ValueError):
        scheduler.get_submit_script(job_tmpl)
    job_tmpl.pop('max_memory_kb')

    # Verify minimal working parameters don't raise
    scheduler.get_submit_script(job_tmpl)