Exemplo n.º 1
0
    def test_create_file_from_filelike(self):
        """Test `aiida.common.folders.Folder.create_file_from_filelike`."""
        unicode_string = 'unicode_string'
        byte_string = b'byte_string'

        try:
            tempdir = tempfile.mkdtemp()
            folder = Folder(tempdir)

            folder.create_file_from_filelike(io.StringIO(unicode_string),
                                             'random.dat',
                                             mode='w',
                                             encoding='utf-8')
            folder.create_file_from_filelike(io.BytesIO(byte_string),
                                             'random.dat',
                                             mode='wb',
                                             encoding=None)

            with self.assertRaises(TypeError):
                folder.create_file_from_filelike(io.StringIO(unicode_string),
                                                 'random.dat',
                                                 mode='wb')

            with self.assertRaises(TypeError):
                folder.create_file_from_filelike(io.BytesIO(byte_string),
                                                 'random.dat',
                                                 mode='w')

        finally:
            shutil.rmtree(tempdir)
Exemplo n.º 2
0
    def test_create_file_from_filelike_py2():
        """Test `aiida.common.folders.Folder.create_file_from_filelike` for python 2."""
        unicode_string = u'unicode_string'
        byte_string = 'byte_string'

        try:
            tempdir = tempfile.mkdtemp()
            folder = Folder(tempdir)

            # Passing a stream with matching file mode should work ofcourse
            folder.create_file_from_filelike(six.StringIO(unicode_string),
                                             'random.dat',
                                             mode='w',
                                             encoding='utf-8')
            folder.create_file_from_filelike(six.StringIO(byte_string),
                                             'random.dat',
                                             mode='wb',
                                             encoding=None)

            # For python 2 the `create_file_from_filelike` should be able to deal with incoherent arguments, such as
            # the examples below where a unicode string is passed with a binary mode, or a byte stream in unicode mode.
            folder.create_file_from_filelike(six.StringIO(unicode_string),
                                             'random.dat',
                                             mode='wb',
                                             encoding=None)
            folder.create_file_from_filelike(six.StringIO(byte_string),
                                             'random.dat',
                                             mode='w',
                                             encoding='utf-8')

        finally:
            shutil.rmtree(tempdir)
Exemplo n.º 3
0
    def test_create_file_from_filelike_py3(self):
        """Test `aiida.common.folders.Folder.create_file_from_filelike` for python 3."""
        unicode_string = 'unicode_string'
        byte_string = b'byte_string'

        try:
            tempdir = tempfile.mkdtemp()
            folder = Folder(tempdir)

            folder.create_file_from_filelike(six.StringIO(unicode_string),
                                             'random.dat',
                                             mode='w',
                                             encoding='utf-8')
            folder.create_file_from_filelike(six.BytesIO(byte_string),
                                             'random.dat',
                                             mode='wb',
                                             encoding=None)

            # For python three we make no exceptions, if you pass a unicode stream with binary mode, one should expect
            # a TypeError. Same for the inverse case of wanting to write in unicode mode but passing a byte stream
            with self.assertRaises(TypeError):
                folder.create_file_from_filelike(six.StringIO(unicode_string),
                                                 'random.dat',
                                                 mode='wb')

            with self.assertRaises(TypeError):
                folder.create_file_from_filelike(six.BytesIO(byte_string),
                                                 'random.dat',
                                                 mode='w')

        finally:
            shutil.rmtree(tempdir)
Exemplo n.º 4
0
    def presubmit(self, folder: Folder) -> CalcInfo:
        """Prepares the calculation folder with all inputs, ready to be copied to the cluster.

        :param folder: a SandboxFolder that can be used to write calculation input files and the scheduling script.

        :return calcinfo: the CalcInfo object containing the information needed by the daemon to handle operations.

        """
        # pylint: disable=too-many-locals,too-many-statements,too-many-branches
        from aiida.common.exceptions import PluginInternalError, ValidationError, InvalidOperation, InputValidationError
        from aiida.common import json
        from aiida.common.utils import validate_list_of_string_tuples
        from aiida.common.datastructures import CodeInfo, CodeRunMode
        from aiida.orm import load_node, Code, Computer
        from aiida.plugins import DataFactory
        from aiida.schedulers.datastructures import JobTemplate

        computer = self.node.computer
        inputs = self.node.get_incoming(link_type=LinkType.INPUT_CALC)

        if not self.inputs.metadata.dry_run and self.node.has_cached_links(
        ):  # type: ignore[union-attr]
            raise InvalidOperation(
                'calculation node has unstored links in cache')

        codes = [_ for _ in inputs.all_nodes() if isinstance(_, Code)]

        for code in codes:
            if not code.can_run_on(computer):
                raise InputValidationError(
                    'The selected code {} for calculation {} cannot run on computer {}'
                    .format(code.pk, self.node.pk, computer.label))

            if code.is_local() and code.get_local_executable(
            ) in folder.get_content_list():
                raise PluginInternalError(
                    f'The plugin created a file {code.get_local_executable()} that is also the executable name!'
                )

        calc_info = self.prepare_for_submission(folder)
        calc_info.uuid = str(self.node.uuid)
        scheduler = computer.get_scheduler()

        # I create the job template to pass to the scheduler
        job_tmpl = JobTemplate()
        job_tmpl.shebang = computer.get_shebang()
        job_tmpl.submit_as_hold = False
        job_tmpl.rerunnable = False
        job_tmpl.job_environment = {}
        # 'email', 'email_on_started', 'email_on_terminated',
        job_tmpl.job_name = f'aiida-{self.node.pk}'
        job_tmpl.sched_output_path = self.options.scheduler_stdout
        if self.options.scheduler_stderr == self.options.scheduler_stdout:
            job_tmpl.sched_join_files = True
        else:
            job_tmpl.sched_error_path = self.options.scheduler_stderr
            job_tmpl.sched_join_files = False

        # Set retrieve path, add also scheduler STDOUT and STDERR
        retrieve_list = calc_info.retrieve_list or []
        if (job_tmpl.sched_output_path is not None
                and job_tmpl.sched_output_path not in retrieve_list):
            retrieve_list.append(job_tmpl.sched_output_path)
        if not job_tmpl.sched_join_files:
            if (job_tmpl.sched_error_path is not None
                    and job_tmpl.sched_error_path not in retrieve_list):
                retrieve_list.append(job_tmpl.sched_error_path)
        retrieve_list.extend(
            self.node.get_option('additional_retrieve_list') or [])
        self.node.set_retrieve_list(retrieve_list)

        retrieve_singlefile_list = calc_info.retrieve_singlefile_list or []
        # a validation on the subclasses of retrieve_singlefile_list
        for _, subclassname, _ in retrieve_singlefile_list:
            file_sub_class = DataFactory(subclassname)
            if not issubclass(file_sub_class, orm.SinglefileData):
                raise PluginInternalError(
                    '[presubmission of calc {}] retrieve_singlefile_list subclass problem: {} is '
                    'not subclass of SinglefileData'.format(
                        self.node.pk, file_sub_class.__name__))
        if retrieve_singlefile_list:
            self.node.set_retrieve_singlefile_list(retrieve_singlefile_list)

        # Handle the retrieve_temporary_list
        retrieve_temporary_list = calc_info.retrieve_temporary_list or []
        self.node.set_retrieve_temporary_list(retrieve_temporary_list)

        # the if is done so that if the method returns None, this is
        # not added. This has two advantages:
        # - it does not add too many \n\n if most of the prepend_text are empty
        # - most importantly, skips the cases in which one of the methods
        #   would return None, in which case the join method would raise
        #   an exception
        prepend_texts = [computer.get_prepend_text()] + \
            [code.get_prepend_text() for code in codes] + \
            [calc_info.prepend_text, self.node.get_option('prepend_text')]
        job_tmpl.prepend_text = '\n\n'.join(prepend_text
                                            for prepend_text in prepend_texts
                                            if prepend_text)

        append_texts = [self.node.get_option('append_text'), calc_info.append_text] + \
            [code.get_append_text() for code in codes] + \
            [computer.get_append_text()]
        job_tmpl.append_text = '\n\n'.join(append_text
                                           for append_text in append_texts
                                           if append_text)

        # Set resources, also with get_default_mpiprocs_per_machine
        resources = self.node.get_option('resources')
        scheduler.preprocess_resources(
            resources, computer.get_default_mpiprocs_per_machine())
        job_tmpl.job_resource = scheduler.create_job_resource(**resources)

        subst_dict = {
            'tot_num_mpiprocs': job_tmpl.job_resource.get_tot_num_mpiprocs()
        }

        for key, value in job_tmpl.job_resource.items():
            subst_dict[key] = value
        mpi_args = [
            arg.format(**subst_dict) for arg in computer.get_mpirun_command()
        ]
        extra_mpirun_params = self.node.get_option(
            'mpirun_extra_params')  # same for all codes in the same calc

        # set the codes_info
        if not isinstance(calc_info.codes_info, (list, tuple)):
            raise PluginInternalError(
                'codes_info passed to CalcInfo must be a list of CalcInfo objects'
            )

        codes_info = []
        for code_info in calc_info.codes_info:

            if not isinstance(code_info, CodeInfo):
                raise PluginInternalError(
                    'Invalid codes_info, must be a list of CodeInfo objects')

            if code_info.code_uuid is None:
                raise PluginInternalError(
                    'CalcInfo should have the information of the code to be launched'
                )
            this_code = load_node(code_info.code_uuid, sub_classes=(Code, ))

            this_withmpi = code_info.withmpi  # to decide better how to set the default
            if this_withmpi is None:
                if len(calc_info.codes_info) > 1:
                    raise PluginInternalError(
                        'For more than one code, it is necessary to set withmpi in codes_info'
                    )
                else:
                    this_withmpi = self.node.get_option('withmpi')

            if this_withmpi:
                this_argv = (mpi_args + extra_mpirun_params +
                             [this_code.get_execname()] +
                             (code_info.cmdline_params
                              if code_info.cmdline_params is not None else []))
            else:
                this_argv = [this_code.get_execname()
                             ] + (code_info.cmdline_params if
                                  code_info.cmdline_params is not None else [])

            # overwrite the old cmdline_params and add codename and mpirun stuff
            code_info.cmdline_params = this_argv

            codes_info.append(code_info)
        job_tmpl.codes_info = codes_info

        # set the codes execution mode

        if len(codes) > 1:
            try:
                job_tmpl.codes_run_mode = calc_info.codes_run_mode
            except KeyError as exc:
                raise PluginInternalError(
                    'Need to set the order of the code execution (parallel or serial?)'
                ) from exc
        else:
            job_tmpl.codes_run_mode = CodeRunMode.SERIAL
        ########################################################################

        custom_sched_commands = self.node.get_option(
            'custom_scheduler_commands')
        if custom_sched_commands:
            job_tmpl.custom_scheduler_commands = custom_sched_commands

        job_tmpl.import_sys_environment = self.node.get_option(
            'import_sys_environment')

        job_tmpl.job_environment = self.node.get_option(
            'environment_variables')

        queue_name = self.node.get_option('queue_name')
        account = self.node.get_option('account')
        qos = self.node.get_option('qos')
        if queue_name is not None:
            job_tmpl.queue_name = queue_name
        if account is not None:
            job_tmpl.account = account
        if qos is not None:
            job_tmpl.qos = qos
        priority = self.node.get_option('priority')
        if priority is not None:
            job_tmpl.priority = priority
        max_memory_kb = self.node.get_option('max_memory_kb')
        if max_memory_kb is not None:
            job_tmpl.max_memory_kb = max_memory_kb
        max_wallclock_seconds = self.node.get_option('max_wallclock_seconds')
        if max_wallclock_seconds is not None:
            job_tmpl.max_wallclock_seconds = max_wallclock_seconds
        max_memory_kb = self.node.get_option('max_memory_kb')
        if max_memory_kb is not None:
            job_tmpl.max_memory_kb = max_memory_kb

        submit_script_filename = self.node.get_option('submit_script_filename')
        script_content = scheduler.get_submit_script(job_tmpl)
        folder.create_file_from_filelike(io.StringIO(script_content),
                                         submit_script_filename,
                                         'w',
                                         encoding='utf8')

        subfolder = folder.get_subfolder('.aiida', create=True)
        subfolder.create_file_from_filelike(io.StringIO(json.dumps(job_tmpl)),
                                            'job_tmpl.json',
                                            'w',
                                            encoding='utf8')
        subfolder.create_file_from_filelike(io.StringIO(json.dumps(calc_info)),
                                            'calcinfo.json',
                                            'w',
                                            encoding='utf8')

        if calc_info.local_copy_list is None:
            calc_info.local_copy_list = []

        if calc_info.remote_copy_list is None:
            calc_info.remote_copy_list = []

        # Some validation
        this_pk = self.node.pk if self.node.pk is not None else '[UNSTORED]'
        local_copy_list = calc_info.local_copy_list
        try:
            validate_list_of_string_tuples(local_copy_list, tuple_length=3)
        except ValidationError as exception:
            raise PluginInternalError(
                f'[presubmission of calc {this_pk}] local_copy_list format problem: {exception}'
            ) from exception

        remote_copy_list = calc_info.remote_copy_list
        try:
            validate_list_of_string_tuples(remote_copy_list, tuple_length=3)
        except ValidationError as exception:
            raise PluginInternalError(
                f'[presubmission of calc {this_pk}] remote_copy_list format problem: {exception}'
            ) from exception

        for (remote_computer_uuid, _, dest_rel_path) in remote_copy_list:
            try:
                Computer.objects.get(uuid=remote_computer_uuid)  # pylint: disable=unused-variable
            except exceptions.NotExistent as exception:
                raise PluginInternalError(
                    '[presubmission of calc {}] '
                    'The remote copy requires a computer with UUID={}'
                    'but no such computer was found in the '
                    'database'.format(this_pk,
                                      remote_computer_uuid)) from exception
            if os.path.isabs(dest_rel_path):
                raise PluginInternalError(
                    '[presubmission of calc {}] '
                    'The destination path of the remote copy '
                    'is absolute! ({})'.format(this_pk, dest_rel_path))

        return calc_info