def presubmit(self, folder): """Prepares the calculation folder with all inputs, ready to be copied to the cluster. :param folder: a SandboxFolder that can be used to write calculation input files and the scheduling script. :type folder: :class:`aiida.common.folders.Folder` :return calcinfo: the CalcInfo object containing the information needed by the daemon to handle operations. :rtype calcinfo: :class:`aiida.common.CalcInfo` """ # pylint: disable=too-many-locals,too-many-statements,too-many-branches import os from aiida.common.exceptions import PluginInternalError, ValidationError, InvalidOperation, InputValidationError from aiida.common import json from aiida.common.utils import validate_list_of_string_tuples from aiida.common.datastructures import CodeInfo, CodeRunMode from aiida.orm import load_node, Code, Computer from aiida.plugins import DataFactory from aiida.schedulers.datastructures import JobTemplate computer = self.node.computer inputs = self.node.get_incoming(link_type=LinkType.INPUT_CALC) if not self.inputs.metadata.dry_run and self.node.has_cached_links(): raise InvalidOperation('calculation node has unstored links in cache') codes = [_ for _ in inputs.all_nodes() if isinstance(_, Code)] for code in codes: if not code.can_run_on(computer): raise InputValidationError('The selected code {} for calculation {} cannot run on computer {}'.format( code.pk, self.node.pk, computer.name)) if code.is_local() and code.get_local_executable() in folder.get_content_list(): raise PluginInternalError('The plugin created a file {} that is also the executable name!'.format( code.get_local_executable())) calc_info = self.prepare_for_submission(folder) calc_info.uuid = str(self.node.uuid) scheduler = computer.get_scheduler() # I create the job template to pass to the scheduler job_tmpl = JobTemplate() job_tmpl.shebang = computer.get_shebang() job_tmpl.submit_as_hold = False job_tmpl.rerunnable = False job_tmpl.job_environment = {} # 'email', 'email_on_started', 'email_on_terminated', job_tmpl.job_name = 'aiida-{}'.format(self.node.pk) job_tmpl.sched_output_path = self.options.scheduler_stdout if self.options.scheduler_stderr == self.options.scheduler_stdout: job_tmpl.sched_join_files = True else: job_tmpl.sched_error_path = self.options.scheduler_stderr job_tmpl.sched_join_files = False # Set retrieve path, add also scheduler STDOUT and STDERR retrieve_list = (calc_info.retrieve_list if calc_info.retrieve_list is not None else []) if (job_tmpl.sched_output_path is not None and job_tmpl.sched_output_path not in retrieve_list): retrieve_list.append(job_tmpl.sched_output_path) if not job_tmpl.sched_join_files: if (job_tmpl.sched_error_path is not None and job_tmpl.sched_error_path not in retrieve_list): retrieve_list.append(job_tmpl.sched_error_path) self.node.set_retrieve_list(retrieve_list) retrieve_singlefile_list = (calc_info.retrieve_singlefile_list if calc_info.retrieve_singlefile_list is not None else []) # a validation on the subclasses of retrieve_singlefile_list for _, subclassname, _ in retrieve_singlefile_list: file_sub_class = DataFactory(subclassname) if not issubclass(file_sub_class, orm.SinglefileData): raise PluginInternalError( '[presubmission of calc {}] retrieve_singlefile_list subclass problem: {} is ' 'not subclass of SinglefileData'.format(self.node.pk, file_sub_class.__name__)) if retrieve_singlefile_list: self.node.set_retrieve_singlefile_list(retrieve_singlefile_list) # Handle the retrieve_temporary_list retrieve_temporary_list = (calc_info.retrieve_temporary_list if calc_info.retrieve_temporary_list is not None else []) self.node.set_retrieve_temporary_list(retrieve_temporary_list) # the if is done so that if the method returns None, this is # not added. This has two advantages: # - it does not add too many \n\n if most of the prepend_text are empty # - most importantly, skips the cases in which one of the methods # would return None, in which case the join method would raise # an exception prepend_texts = [computer.get_prepend_text()] + \ [code.get_prepend_text() for code in codes] + \ [calc_info.prepend_text, self.node.get_option('prepend_text')] job_tmpl.prepend_text = '\n\n'.join(prepend_text for prepend_text in prepend_texts if prepend_text) append_texts = [self.node.get_option('append_text'), calc_info.append_text] + \ [code.get_append_text() for code in codes] + \ [computer.get_append_text()] job_tmpl.append_text = '\n\n'.join(append_text for append_text in append_texts if append_text) # Set resources, also with get_default_mpiprocs_per_machine resources = self.node.get_option('resources') scheduler.preprocess_resources(resources, computer.get_default_mpiprocs_per_machine()) job_tmpl.job_resource = scheduler.create_job_resource(**resources) subst_dict = {'tot_num_mpiprocs': job_tmpl.job_resource.get_tot_num_mpiprocs()} for key, value in job_tmpl.job_resource.items(): subst_dict[key] = value mpi_args = [arg.format(**subst_dict) for arg in computer.get_mpirun_command()] extra_mpirun_params = self.node.get_option('mpirun_extra_params') # same for all codes in the same calc # set the codes_info if not isinstance(calc_info.codes_info, (list, tuple)): raise PluginInternalError('codes_info passed to CalcInfo must be a list of CalcInfo objects') codes_info = [] for code_info in calc_info.codes_info: if not isinstance(code_info, CodeInfo): raise PluginInternalError('Invalid codes_info, must be a list of CodeInfo objects') if code_info.code_uuid is None: raise PluginInternalError('CalcInfo should have ' 'the information of the code ' 'to be launched') this_code = load_node(code_info.code_uuid, sub_classes=(Code,)) this_withmpi = code_info.withmpi # to decide better how to set the default if this_withmpi is None: if len(calc_info.codes_info) > 1: raise PluginInternalError('For more than one code, it is ' 'necessary to set withmpi in ' 'codes_info') else: this_withmpi = self.node.get_option('withmpi') if this_withmpi: this_argv = (mpi_args + extra_mpirun_params + [this_code.get_execname()] + (code_info.cmdline_params if code_info.cmdline_params is not None else [])) else: this_argv = [this_code.get_execname()] + (code_info.cmdline_params if code_info.cmdline_params is not None else []) # overwrite the old cmdline_params and add codename and mpirun stuff code_info.cmdline_params = this_argv codes_info.append(code_info) job_tmpl.codes_info = codes_info # set the codes execution mode if len(codes) > 1: try: job_tmpl.codes_run_mode = calc_info.codes_run_mode except KeyError: raise PluginInternalError('Need to set the order of the code execution (parallel or serial?)') else: job_tmpl.codes_run_mode = CodeRunMode.SERIAL ######################################################################## custom_sched_commands = self.node.get_option('custom_scheduler_commands') if custom_sched_commands: job_tmpl.custom_scheduler_commands = custom_sched_commands job_tmpl.import_sys_environment = self.node.get_option('import_sys_environment') job_tmpl.job_environment = self.node.get_option('environment_variables') queue_name = self.node.get_option('queue_name') account = self.node.get_option('account') qos = self.node.get_option('qos') if queue_name is not None: job_tmpl.queue_name = queue_name if account is not None: job_tmpl.account = account if qos is not None: job_tmpl.qos = qos priority = self.node.get_option('priority') if priority is not None: job_tmpl.priority = priority max_memory_kb = self.node.get_option('max_memory_kb') if max_memory_kb is not None: job_tmpl.max_memory_kb = max_memory_kb max_wallclock_seconds = self.node.get_option('max_wallclock_seconds') if max_wallclock_seconds is not None: job_tmpl.max_wallclock_seconds = max_wallclock_seconds max_memory_kb = self.node.get_option('max_memory_kb') if max_memory_kb is not None: job_tmpl.max_memory_kb = max_memory_kb submit_script_filename = self.node.get_option('submit_script_filename') script_content = scheduler.get_submit_script(job_tmpl) folder.create_file_from_filelike(io.StringIO(script_content), submit_script_filename, 'w', encoding='utf8') subfolder = folder.get_subfolder('.aiida', create=True) subfolder.create_file_from_filelike(io.StringIO(json.dumps(job_tmpl)), 'job_tmpl.json', 'w', encoding='utf8') subfolder.create_file_from_filelike(io.StringIO(json.dumps(calc_info)), 'calcinfo.json', 'w', encoding='utf8') if calc_info.local_copy_list is None: calc_info.local_copy_list = [] if calc_info.remote_copy_list is None: calc_info.remote_copy_list = [] # Some validation this_pk = self.node.pk if self.node.pk is not None else '[UNSTORED]' local_copy_list = calc_info.local_copy_list try: validate_list_of_string_tuples(local_copy_list, tuple_length=3) except ValidationError as exc: raise PluginInternalError('[presubmission of calc {}] ' 'local_copy_list format problem: {}'.format(this_pk, exc)) remote_copy_list = calc_info.remote_copy_list try: validate_list_of_string_tuples(remote_copy_list, tuple_length=3) except ValidationError as exc: raise PluginInternalError('[presubmission of calc {}] ' 'remote_copy_list format problem: {}'.format(this_pk, exc)) for (remote_computer_uuid, _, dest_rel_path) in remote_copy_list: try: Computer.objects.get(uuid=remote_computer_uuid) # pylint: disable=unused-variable except exceptions.NotExistent: raise PluginInternalError('[presubmission of calc {}] ' 'The remote copy requires a computer with UUID={}' 'but no such computer was found in the ' 'database'.format(this_pk, remote_computer_uuid)) if os.path.isabs(dest_rel_path): raise PluginInternalError('[presubmission of calc {}] ' 'The destination path of the remote copy ' 'is absolute! ({})'.format(this_pk, dest_rel_path)) return calc_info
def _prepare_for_submission(self, tempfolder, inputdict): """ This is the routine to be called when you want to create the input files and related stuff with a plugin. :param tempfolder: a aiida.common.folders.Folder subclass where the plugin should put all its files. :param inputdict: a dictionary with the input nodes, as they would be returned by get_inputs_dict (with the Code!) """ import StringIO from aiida.orm.data.singlefile import SinglefileData from aiida.orm.data.remote import RemoteData from aiida.common.utils import validate_list_of_string_tuples from aiida.common.exceptions import ValidationError parameters_node = inputdict.pop('parameters', None) if parameters_node is None: parameters = {} else: parameters = parameters_node.get_dict() template_node = inputdict.pop('template', None) template = template_node.get_dict() input_file_template = template.pop('input_file_template', '') input_file_name = template.pop('input_file_name', None) output_file_name = template.pop('output_file_name', None) cmdline_params_tmpl = template.pop('cmdline_params', []) input_through_stdin = template.pop('input_through_stdin', False) files_to_copy = template.pop('files_to_copy', []) retrieve_temporary_files = template.pop('retrieve_temporary_files', []) if template: raise InputValidationError( 'The following keys could not be used in the template node: {}' .format(template.keys())) try: validate_list_of_string_tuples(files_to_copy, tuple_length=2) except ValidationError as e: raise InputValidationError( "invalid file_to_copy format: {}".format(e.message)) local_copy_list = [] remote_copy_list = [] for link_name, dest_rel_path in files_to_copy: try: fileobj = inputdict.pop(link_name) except KeyError: raise InputValidationError( "You are asking to copy a file link {}, " "but there is no input link with such a name".format( link_name)) if isinstance(fileobj, SinglefileData): local_copy_list.append( (fileobj.get_file_abs_path(), dest_rel_path)) elif isinstance(fileobj, RemoteData): # can be a folder remote_copy_list.append( (fileobj.get_computer().uuid, fileobj.get_remote_path(), dest_rel_path)) else: raise InputValidationError( "If you ask to copy a file link {}, " "it must be either a SinglefileData or a RemoteData; it is instead of type {}" .format(link_name, fileobj.__class__.__name__)) code = inputdict.pop('code', None) if code is None: raise InputValidationError("No code in input") if len(inputdict) > 0: raise InputValidationError( "The input nodes with the following labels could not be " "used by the templatereplacer plugin: {}".format( inputdict.keys())) if input_file_name is not None and not input_file_template: raise InputValidationError( "If you give an input_file_name, you " "must also specify a input_file_template") if input_through_stdin and input_file_name is None: raise InputValidationError( "If you ask for input_through_stdin you have to " "specify a input_file_name") input_file = StringIO.StringIO( input_file_template.format(**parameters)) if input_file_name: tempfolder.create_file_from_filelike(input_file, input_file_name) else: if input_file_template: self.logger.warning( "No input file name passed, but a input file template is present" ) cmdline_params = [i.format(**parameters) for i in cmdline_params_tmpl] calcinfo = CalcInfo() calcinfo.retrieve_list = [] calcinfo.retrieve_temporary_list = [] calcinfo.uuid = self.uuid calcinfo.local_copy_list = local_copy_list calcinfo.remote_copy_list = remote_copy_list codeinfo = CodeInfo() codeinfo.cmdline_params = cmdline_params if input_through_stdin is not None: codeinfo.stdin_name = input_file_name if output_file_name: codeinfo.stdout_name = output_file_name calcinfo.retrieve_list.append(output_file_name) if retrieve_temporary_files: calcinfo.retrieve_temporary_list = retrieve_temporary_files codeinfo.code_uuid = code.uuid calcinfo.codes_info = [codeinfo] return calcinfo
def prepare_for_submission(self, folder): """ This is the routine to be called when you want to create the input files and related stuff with a plugin. :param folder: a aiida.common.folders.Folder subclass where the plugin should put all its files. """ # pylint: disable=too-many-locals,too-many-statements,too-many-branches from aiida.common.utils import validate_list_of_string_tuples from aiida.common.exceptions import ValidationError code = self.inputs.code template = self.inputs.template.get_dict() try: parameters = self.inputs.parameters.get_dict() except AttributeError: parameters = {} input_file_template = template.pop('input_file_template', '') input_file_name = template.pop('input_file_name', None) output_file_name = template.pop('output_file_name', None) cmdline_params_tmpl = template.pop('cmdline_params', []) input_through_stdin = template.pop('input_through_stdin', False) files_to_copy = template.pop('files_to_copy', []) retrieve_temporary_files = template.pop('retrieve_temporary_files', []) if template: raise exceptions.InputValidationError( 'The following keys could not be used in the template node: {}'.format(template.keys())) try: validate_list_of_string_tuples(files_to_copy, tuple_length=2) except ValidationError as exc: raise exceptions.InputValidationError('invalid file_to_copy format: {}'.format(exc)) local_copy_list = [] remote_copy_list = [] for link_name, dest_rel_path in files_to_copy: try: fileobj = self.inputs.files[link_name] except AttributeError: raise exceptions.InputValidationError('You are asking to copy a file link {}, ' 'but there is no input link with such a name'.format(link_name)) if isinstance(fileobj, orm.SinglefileData): local_copy_list.append((fileobj.uuid, fileobj.filename, dest_rel_path)) elif isinstance(fileobj, orm.RemoteData): # can be a folder remote_copy_list.append((fileobj.computer.uuid, fileobj.get_remote_path(), dest_rel_path)) else: raise exceptions.InputValidationError( 'If you ask to copy a file link {}, ' 'it must be either a SinglefileData or a RemoteData; it is instead of type {}'.format( link_name, fileobj.__class__.__name__)) if input_file_name is not None and not input_file_template: raise exceptions.InputValidationError( 'If you give an input_file_name, you must also specify a input_file_template') if input_through_stdin and input_file_name is None: raise exceptions.InputValidationError( 'If you ask for input_through_stdin you have to specify a input_file_name') input_content = input_file_template.format(**parameters) if input_file_name: folder.create_file_from_filelike(io.StringIO(input_content), input_file_name, 'w', encoding='utf8') else: if input_file_template: self.logger.warning('No input file name passed, but a input file template is present') cmdline_params = [i.format(**parameters) for i in cmdline_params_tmpl] calcinfo = CalcInfo() calcinfo.retrieve_list = [] calcinfo.retrieve_temporary_list = [] calcinfo.local_copy_list = local_copy_list calcinfo.remote_copy_list = remote_copy_list codeinfo = CodeInfo() codeinfo.cmdline_params = cmdline_params if input_through_stdin: codeinfo.stdin_name = input_file_name if output_file_name: codeinfo.stdout_name = output_file_name calcinfo.retrieve_list.append(output_file_name) if retrieve_temporary_files: calcinfo.retrieve_temporary_list = retrieve_temporary_files codeinfo.code_uuid = code.uuid calcinfo.codes_info = [codeinfo] return calcinfo