Exemplo n.º 1
0
    def test_unicode(self):
        """Check that there are no exceptions raised when using unicode folders."""
        tmpsource = tempfile.mkdtemp()
        tmpdest = tempfile.mkdtemp()

        with open(os.path.join(tmpsource, 'sąžininga'), 'w',
                  encoding='utf8') as fhandle:
            fhandle.write('test')
        with open(os.path.join(tmpsource, 'žąsis'), 'w',
                  encoding='utf8') as fhandle:
            fhandle.write('test')

        folder = Folder(tmpdest)
        folder.insert_path(tmpsource, 'destination')
        folder.insert_path(tmpsource, 'šaltinis')

        self.assertEqual(sorted(folder.get_content_list()),
                         sorted(['destination', 'šaltinis']))
        self.assertEqual(
            sorted(folder.get_subfolder('destination').get_content_list()),
            sorted(['sąžininga', 'žąsis']))
        self.assertEqual(
            sorted(folder.get_subfolder('šaltinis').get_content_list()),
            sorted(['sąžininga', 'žąsis']))

        folder = Folder(os.path.join(tmpsource, 'šaltinis'))
        folder.insert_path(tmpdest, 'destination')
        folder.insert_path(tmpdest, 'kitas-šaltinis')
        self.assertEqual(sorted(folder.get_content_list()),
                         sorted(['destination', 'kitas-šaltinis']))
Exemplo n.º 2
0
    def presubmit(self, folder: Folder) -> CalcInfo:
        """Prepares the calculation folder with all inputs, ready to be copied to the cluster.

        :param folder: a SandboxFolder that can be used to write calculation input files and the scheduling script.

        :return calcinfo: the CalcInfo object containing the information needed by the daemon to handle operations.

        """
        # pylint: disable=too-many-locals,too-many-statements,too-many-branches
        from aiida.common.exceptions import PluginInternalError, ValidationError, InvalidOperation, InputValidationError
        from aiida.common import json
        from aiida.common.utils import validate_list_of_string_tuples
        from aiida.common.datastructures import CodeInfo, CodeRunMode
        from aiida.orm import load_node, Code, Computer
        from aiida.plugins import DataFactory
        from aiida.schedulers.datastructures import JobTemplate

        computer = self.node.computer
        inputs = self.node.get_incoming(link_type=LinkType.INPUT_CALC)

        if not self.inputs.metadata.dry_run and self.node.has_cached_links(
        ):  # type: ignore[union-attr]
            raise InvalidOperation(
                'calculation node has unstored links in cache')

        codes = [_ for _ in inputs.all_nodes() if isinstance(_, Code)]

        for code in codes:
            if not code.can_run_on(computer):
                raise InputValidationError(
                    'The selected code {} for calculation {} cannot run on computer {}'
                    .format(code.pk, self.node.pk, computer.label))

            if code.is_local() and code.get_local_executable(
            ) in folder.get_content_list():
                raise PluginInternalError(
                    f'The plugin created a file {code.get_local_executable()} that is also the executable name!'
                )

        calc_info = self.prepare_for_submission(folder)
        calc_info.uuid = str(self.node.uuid)
        scheduler = computer.get_scheduler()

        # I create the job template to pass to the scheduler
        job_tmpl = JobTemplate()
        job_tmpl.shebang = computer.get_shebang()
        job_tmpl.submit_as_hold = False
        job_tmpl.rerunnable = False
        job_tmpl.job_environment = {}
        # 'email', 'email_on_started', 'email_on_terminated',
        job_tmpl.job_name = f'aiida-{self.node.pk}'
        job_tmpl.sched_output_path = self.options.scheduler_stdout
        if self.options.scheduler_stderr == self.options.scheduler_stdout:
            job_tmpl.sched_join_files = True
        else:
            job_tmpl.sched_error_path = self.options.scheduler_stderr
            job_tmpl.sched_join_files = False

        # Set retrieve path, add also scheduler STDOUT and STDERR
        retrieve_list = calc_info.retrieve_list or []
        if (job_tmpl.sched_output_path is not None
                and job_tmpl.sched_output_path not in retrieve_list):
            retrieve_list.append(job_tmpl.sched_output_path)
        if not job_tmpl.sched_join_files:
            if (job_tmpl.sched_error_path is not None
                    and job_tmpl.sched_error_path not in retrieve_list):
                retrieve_list.append(job_tmpl.sched_error_path)
        retrieve_list.extend(
            self.node.get_option('additional_retrieve_list') or [])
        self.node.set_retrieve_list(retrieve_list)

        retrieve_singlefile_list = calc_info.retrieve_singlefile_list or []
        # a validation on the subclasses of retrieve_singlefile_list
        for _, subclassname, _ in retrieve_singlefile_list:
            file_sub_class = DataFactory(subclassname)
            if not issubclass(file_sub_class, orm.SinglefileData):
                raise PluginInternalError(
                    '[presubmission of calc {}] retrieve_singlefile_list subclass problem: {} is '
                    'not subclass of SinglefileData'.format(
                        self.node.pk, file_sub_class.__name__))
        if retrieve_singlefile_list:
            self.node.set_retrieve_singlefile_list(retrieve_singlefile_list)

        # Handle the retrieve_temporary_list
        retrieve_temporary_list = calc_info.retrieve_temporary_list or []
        self.node.set_retrieve_temporary_list(retrieve_temporary_list)

        # the if is done so that if the method returns None, this is
        # not added. This has two advantages:
        # - it does not add too many \n\n if most of the prepend_text are empty
        # - most importantly, skips the cases in which one of the methods
        #   would return None, in which case the join method would raise
        #   an exception
        prepend_texts = [computer.get_prepend_text()] + \
            [code.get_prepend_text() for code in codes] + \
            [calc_info.prepend_text, self.node.get_option('prepend_text')]
        job_tmpl.prepend_text = '\n\n'.join(prepend_text
                                            for prepend_text in prepend_texts
                                            if prepend_text)

        append_texts = [self.node.get_option('append_text'), calc_info.append_text] + \
            [code.get_append_text() for code in codes] + \
            [computer.get_append_text()]
        job_tmpl.append_text = '\n\n'.join(append_text
                                           for append_text in append_texts
                                           if append_text)

        # Set resources, also with get_default_mpiprocs_per_machine
        resources = self.node.get_option('resources')
        scheduler.preprocess_resources(
            resources, computer.get_default_mpiprocs_per_machine())
        job_tmpl.job_resource = scheduler.create_job_resource(**resources)

        subst_dict = {
            'tot_num_mpiprocs': job_tmpl.job_resource.get_tot_num_mpiprocs()
        }

        for key, value in job_tmpl.job_resource.items():
            subst_dict[key] = value
        mpi_args = [
            arg.format(**subst_dict) for arg in computer.get_mpirun_command()
        ]
        extra_mpirun_params = self.node.get_option(
            'mpirun_extra_params')  # same for all codes in the same calc

        # set the codes_info
        if not isinstance(calc_info.codes_info, (list, tuple)):
            raise PluginInternalError(
                'codes_info passed to CalcInfo must be a list of CalcInfo objects'
            )

        codes_info = []
        for code_info in calc_info.codes_info:

            if not isinstance(code_info, CodeInfo):
                raise PluginInternalError(
                    'Invalid codes_info, must be a list of CodeInfo objects')

            if code_info.code_uuid is None:
                raise PluginInternalError(
                    'CalcInfo should have the information of the code to be launched'
                )
            this_code = load_node(code_info.code_uuid, sub_classes=(Code, ))

            this_withmpi = code_info.withmpi  # to decide better how to set the default
            if this_withmpi is None:
                if len(calc_info.codes_info) > 1:
                    raise PluginInternalError(
                        'For more than one code, it is necessary to set withmpi in codes_info'
                    )
                else:
                    this_withmpi = self.node.get_option('withmpi')

            if this_withmpi:
                this_argv = (mpi_args + extra_mpirun_params +
                             [this_code.get_execname()] +
                             (code_info.cmdline_params
                              if code_info.cmdline_params is not None else []))
            else:
                this_argv = [this_code.get_execname()
                             ] + (code_info.cmdline_params if
                                  code_info.cmdline_params is not None else [])

            # overwrite the old cmdline_params and add codename and mpirun stuff
            code_info.cmdline_params = this_argv

            codes_info.append(code_info)
        job_tmpl.codes_info = codes_info

        # set the codes execution mode

        if len(codes) > 1:
            try:
                job_tmpl.codes_run_mode = calc_info.codes_run_mode
            except KeyError as exc:
                raise PluginInternalError(
                    'Need to set the order of the code execution (parallel or serial?)'
                ) from exc
        else:
            job_tmpl.codes_run_mode = CodeRunMode.SERIAL
        ########################################################################

        custom_sched_commands = self.node.get_option(
            'custom_scheduler_commands')
        if custom_sched_commands:
            job_tmpl.custom_scheduler_commands = custom_sched_commands

        job_tmpl.import_sys_environment = self.node.get_option(
            'import_sys_environment')

        job_tmpl.job_environment = self.node.get_option(
            'environment_variables')

        queue_name = self.node.get_option('queue_name')
        account = self.node.get_option('account')
        qos = self.node.get_option('qos')
        if queue_name is not None:
            job_tmpl.queue_name = queue_name
        if account is not None:
            job_tmpl.account = account
        if qos is not None:
            job_tmpl.qos = qos
        priority = self.node.get_option('priority')
        if priority is not None:
            job_tmpl.priority = priority
        max_memory_kb = self.node.get_option('max_memory_kb')
        if max_memory_kb is not None:
            job_tmpl.max_memory_kb = max_memory_kb
        max_wallclock_seconds = self.node.get_option('max_wallclock_seconds')
        if max_wallclock_seconds is not None:
            job_tmpl.max_wallclock_seconds = max_wallclock_seconds
        max_memory_kb = self.node.get_option('max_memory_kb')
        if max_memory_kb is not None:
            job_tmpl.max_memory_kb = max_memory_kb

        submit_script_filename = self.node.get_option('submit_script_filename')
        script_content = scheduler.get_submit_script(job_tmpl)
        folder.create_file_from_filelike(io.StringIO(script_content),
                                         submit_script_filename,
                                         'w',
                                         encoding='utf8')

        subfolder = folder.get_subfolder('.aiida', create=True)
        subfolder.create_file_from_filelike(io.StringIO(json.dumps(job_tmpl)),
                                            'job_tmpl.json',
                                            'w',
                                            encoding='utf8')
        subfolder.create_file_from_filelike(io.StringIO(json.dumps(calc_info)),
                                            'calcinfo.json',
                                            'w',
                                            encoding='utf8')

        if calc_info.local_copy_list is None:
            calc_info.local_copy_list = []

        if calc_info.remote_copy_list is None:
            calc_info.remote_copy_list = []

        # Some validation
        this_pk = self.node.pk if self.node.pk is not None else '[UNSTORED]'
        local_copy_list = calc_info.local_copy_list
        try:
            validate_list_of_string_tuples(local_copy_list, tuple_length=3)
        except ValidationError as exception:
            raise PluginInternalError(
                f'[presubmission of calc {this_pk}] local_copy_list format problem: {exception}'
            ) from exception

        remote_copy_list = calc_info.remote_copy_list
        try:
            validate_list_of_string_tuples(remote_copy_list, tuple_length=3)
        except ValidationError as exception:
            raise PluginInternalError(
                f'[presubmission of calc {this_pk}] remote_copy_list format problem: {exception}'
            ) from exception

        for (remote_computer_uuid, _, dest_rel_path) in remote_copy_list:
            try:
                Computer.objects.get(uuid=remote_computer_uuid)  # pylint: disable=unused-variable
            except exceptions.NotExistent as exception:
                raise PluginInternalError(
                    '[presubmission of calc {}] '
                    'The remote copy requires a computer with UUID={}'
                    'but no such computer was found in the '
                    'database'.format(this_pk,
                                      remote_computer_uuid)) from exception
            if os.path.isabs(dest_rel_path):
                raise PluginInternalError(
                    '[presubmission of calc {}] '
                    'The destination path of the remote copy '
                    'is absolute! ({})'.format(this_pk, dest_rel_path))

        return calc_info
Exemplo n.º 3
0
    def prepare_for_submission(self, folder: folders.Folder):
        """Create input files from the input nodes passed to this instance of the `CalcJob`.

        :param folder: an `aiida.common.folders.Folder` to temporarily write files on disk
        :return: `aiida.common.datastructures.CalcInfo` instance
        """
        # To be filled out below
        local_copy_list = []
        remote_copy_list = []
        remote_symlink_list = []

        # Create the subfolders for pseudopotentials and orbitals
        folder.get_subfolder(self._PSEUDO_SUBFOLDER, create=True)
        folder.get_subfolder(self._ORBITAL_SUBFOLDER, create=True)

        # Get an uppercase-key-only version of the settings dictionary (also check for case-insensitive duplicates)
        if 'settings' in self.inputs:
            settings = uppercase_dict_keys(self.inputs.settings.get_dict(),
                                           dict_name='settings')
        else:
            settings = {}

        # Get an uppercase-key-only verion of the parameters dictionary (also check for case-insensitive duplicates)
        parameters = uppercase_dict_keys(self.inputs.parameters.get_dict(),
                                         dict_name='parameters')

        # No reserved parameter keywords should be provided
        self._check_reserved_keywords(parameters)

        # Load parameter schema
        with open(self._INPUT_SCHEMA, 'r') as stream:
            schema = json.load(stream)

        # Automatically generate input parameters for derived fields, e.g. structure -> Atoms.Unitvectors, etc.
        parameters = self._generate_input_parameters(
            self.inputs.structure, self.inputs.kpoints, parameters,
            self.inputs.pseudos, self.inputs.orbitals,
            self.inputs.orbital_configurations)

        # Get a lowercase-value-only version of the parameters dictionary
        parameters = lowercase_dict_values(parameters)

        # Validate input parameters
        self._validate_inputs(self.inputs.structure, self.inputs.kpoints,
                              parameters, self.inputs.pseudos,
                              self.inputs.orbitals,
                              self.inputs.orbital_configurations, schema)

        # Get input file contents and lists of the pseudopotential and orbital files which need to be copied
        input_file_content = write_input_file(parameters, schema)
        local_copy_pseudo_list, local_copy_orbital_list = self._generate_local_copy_lists(
            self.inputs.pseudos, self.inputs.orbitals)

        local_copy_list += local_copy_pseudo_list
        local_copy_list += local_copy_orbital_list

        # Add output files to retrieve which have been specified to write in the input parameters
        retrieve_list = []
        if parameters.get('BAND_NKPATH', 0) > 0 and parameters.get(
                'SCF_EIGENVALUESOLVER', 'band') == 'band':
            retrieve_list.append(self._DATAFILE_BAND_FILE)
        if parameters.get('MD_TYPE', 'nomd') != 'nomd':
            retrieve_list.append(self._DATAFILE_MD_FILE)
            retrieve_list.append(self._DATAFILE_MD2_FILE)

        # Write input file
        with folder.open(self._INPUT_FILE, 'w') as handle:
            handle.write(input_file_content)

        # Fill out the `CodeInfo`
        codeinfo = datastructures.CodeInfo()
        codeinfo.code_uuid = self.inputs.code.uuid
        codeinfo.withmpi = True
        codeinfo.cmdline_params = ([self._INPUT_FILE] +
                                   list(settings.pop('CMDLINE', [])))
        codeinfo.stdout_name = self._OUTPUT_FILE

        # Fill out the `CalcInfo`
        calcinfo = datastructures.CalcInfo()
        calcinfo.uuid = str(self.uuid)
        calcinfo.codes_info = [codeinfo]
        calcinfo.local_copy_list = local_copy_list
        calcinfo.remote_copy_list = remote_copy_list
        calcinfo.remote_symlink_list = remote_symlink_list
        calcinfo.retrieve_list = retrieve_list
        calcinfo.retrieve_list.append(self._OUTPUT_FILE)
        calcinfo.retrieve_list += settings.pop('ADDITIONAL_RETRIEVE_LIST', [])

        # TODO: pop parser settings and report remaining unknown settings

        return calcinfo