def _prepare_for_retrieval(self, open_transport): """ Prepare the calculation for retrieval by daemon. :param open_transport: An open instance of the transport class of the calculation's computer. :type open_transport: aiida.transport.plugins.local.LocalTransport | aiida.transport.plugins.ssh.SshTransport Here, we * manually set the files to retrieve * store the calculation and all it's input nodes * copy the input file to the calculation's raw_input_folder in the * store the remote_workdir as a RemoteData output node """ # Manually set the files that will be copied to the repository and that # the parser will extract the results from. This would normally be # performed in self._prepare_for_submission prior to submission. self._set_attr('retrieve_list', [self._OUTPUT_FILE_NAME, self._DATAFILE_XML]) self._set_attr('retrieve_singlefile_list', []) # Make sure the calculation and input links are stored. self.store_all() # Store the original input file in the calculation's repository folder. remote_path = os.path.join(self._get_remote_workdir(), self._INPUT_FILE_NAME) raw_input_folder = self.folder.get_subfolder(_input_subfolder, create=True) open_transport.get(remote_path, raw_input_folder.abspath) # Manually add the remote working directory as a RemoteData output # node. self._set_state(calc_states.SUBMITTING) remotedata = RemoteData(computer=self.get_computer(), remote_path=self._get_remote_workdir()) remotedata.add_link_from(self, label='remote_folder', link_type=LinkType.CREATE) remotedata.store()
def submit_calculation(calculation, transport): """ Submit a calculation :param calculation: the instance of JobCalculation to submit. :param transport: an already opened transport to use to submit the calculation. """ from aiida.orm import Code from aiida.common.exceptions import InputValidationError from aiida.orm.data.remote import RemoteData computer = calculation.get_computer() if not computer.is_enabled(): return logger_extra = get_dblogger_extra(calculation) transport._set_logger_extra(logger_extra) if calculation._has_cached_links(): raise ValueError("Cannot submit calculation {} because it has " "cached input links! If you " "just want to test the submission, use the " "test_submit() method, otherwise store all links" "first".format(calculation.pk)) s = computer.get_scheduler() s.set_transport(transport) with SandboxFolder() as folder: calcinfo, script_filename = calculation._presubmit( folder, use_unstored_links=False) codes_info = calcinfo.codes_info input_codes = [ load_node(_.code_uuid, sub_class=Code) for _ in codes_info ] for code in input_codes: if not code.can_run_on(computer): raise InputValidationError( "The selected code {} for calculation " "{} cannot run on computer {}".format( code.pk, calculation.pk, computer.name)) # After this call, no modifications to the folder should be done calculation._store_raw_input_folder(folder.abspath) # NOTE: some logic is partially replicated in the 'test_submit' # method of JobCalculation. If major logic changes are done # here, make sure to update also the test_submit routine remote_user = transport.whoami() # TODO Doc: {username} field # TODO: if something is changed here, fix also 'verdi computer test' remote_working_directory = computer.get_workdir().format( username=remote_user) if not remote_working_directory.strip(): raise exceptions.ConfigurationError( "[submission of calculation {}] " "No remote_working_directory configured for computer " "'{}'".format(calculation.pk, computer.name)) # If it already exists, no exception is raised try: transport.chdir(remote_working_directory) except IOError: execlogger.debug( "[submission of calculation {}] " "Unable to chdir in {}, trying to create it".format( calculation.pk, remote_working_directory), extra=logger_extra) try: transport.makedirs(remote_working_directory) transport.chdir(remote_working_directory) except (IOError, OSError) as e: raise exceptions.ConfigurationError( "[submission of calculation {}] " "Unable to create the remote directory {} on " "computer '{}': {}".format(calculation.pk, remote_working_directory, computer.name, e.message)) # Store remotely with sharding (here is where we choose # the folder structure of remote jobs; then I store this # in the calculation properties using _set_remote_dir # and I do not have to know the logic, but I just need to # read the absolute path from the calculation properties. transport.mkdir(calcinfo.uuid[:2], ignore_existing=True) transport.chdir(calcinfo.uuid[:2]) transport.mkdir(calcinfo.uuid[2:4], ignore_existing=True) transport.chdir(calcinfo.uuid[2:4]) transport.mkdir(calcinfo.uuid[4:]) transport.chdir(calcinfo.uuid[4:]) workdir = transport.getcwd() # I store the workdir of the calculation for later file # retrieval calculation._set_remote_workdir(workdir) # I first create the code files, so that the code can put # default files to be overwritten by the plugin itself. # Still, beware! The code file itself could be overwritten... # But I checked for this earlier. for code in input_codes: if code.is_local(): # Note: this will possibly overwrite files for f in code.get_folder_list(): transport.put(code.get_abs_path(f), f) transport.chmod(code.get_local_executable(), 0o755) # rwxr-xr-x # copy all files, recursively with folders for f in folder.get_content_list(): execlogger.debug("[submission of calculation {}] " "copying file/folder {}...".format( calculation.pk, f), extra=logger_extra) transport.put(folder.get_abs_path(f), f) # local_copy_list is a list of tuples, # each with (src_abs_path, dest_rel_path) # NOTE: validation of these lists are done # inside calculation._presubmit() local_copy_list = calcinfo.local_copy_list remote_copy_list = calcinfo.remote_copy_list remote_symlink_list = calcinfo.remote_symlink_list if local_copy_list is not None: for src_abs_path, dest_rel_path in local_copy_list: execlogger.debug("[submission of calculation {}] " "copying local file/folder to {}".format( calculation.pk, dest_rel_path), extra=logger_extra) transport.put(src_abs_path, dest_rel_path) if remote_copy_list is not None: for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_copy_list: if remote_computer_uuid == computer.uuid: execlogger.debug( "[submission of calculation {}] " "copying {} remotely, directly on the machine " "{}".format(calculation.pk, dest_rel_path, computer.name)) try: transport.copy(remote_abs_path, dest_rel_path) except (IOError, OSError): execlogger.warning( "[submission of calculation {}] " "Unable to copy remote resource from {} to {}! " "Stopping.".format(calculation.pk, remote_abs_path, dest_rel_path), extra=logger_extra) raise else: # TODO: implement copy between two different # machines! raise NotImplementedError( "[presubmission of calculation {}] " "Remote copy between two different machines is " "not implemented yet".format(calculation.pk)) if remote_symlink_list is not None: for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_symlink_list: if remote_computer_uuid == computer.uuid: execlogger.debug( "[submission of calculation {}] " "copying {} remotely, directly on the machine " "{}".format(calculation.pk, dest_rel_path, computer.name)) try: transport.symlink(remote_abs_path, dest_rel_path) except (IOError, OSError): execlogger.warning( "[submission of calculation {}] " "Unable to create remote symlink from {} to {}! " "Stopping.".format(calculation.pk, remote_abs_path, dest_rel_path), extra=logger_extra) raise else: raise IOError("It is not possible to create a symlink " "between two different machines for " "calculation {}".format(calculation.pk)) remotedata = RemoteData(computer=computer, remote_path=workdir) remotedata.add_link_from(calculation, label='remote_folder', link_type=LinkType.CREATE) remotedata.store() job_id = s.submit_from_script(transport.getcwd(), script_filename) calculation._set_job_id(job_id)
def test_complex_graph_import_export(self): """ This test checks that a small and bit complex graph can be correctly exported and imported. It will create the graph, store it to the database, export it to a file and import it. In the end it will check if the initial nodes are present at the imported graph. """ import tempfile import shutil import os from aiida.orm.calculation.job import JobCalculation from aiida.orm.data.folder import FolderData from aiida.orm.data.parameter import ParameterData from aiida.orm.data.remote import RemoteData from aiida.common.links import LinkType from aiida.orm.importexport import export, import_data from aiida.orm.utils import load_node from aiida.common.exceptions import NotExistent temp_folder = tempfile.mkdtemp() try: calc1 = JobCalculation() calc1.set_computer(self.computer) calc1.set_resources({ "num_machines": 1, "num_mpiprocs_per_machine": 1 }) calc1.label = "calc1" calc1.store() calc1._set_state(u'RETRIEVING') pd1 = ParameterData() pd1.label = "pd1" pd1.store() pd2 = ParameterData() pd2.label = "pd2" pd2.store() rd1 = RemoteData() rd1.label = "rd1" rd1.set_remote_path("/x/y.py") rd1.set_computer(self.computer) rd1.store() rd1.add_link_from(calc1, link_type=LinkType.CREATE) calc2 = JobCalculation() calc2.set_computer(self.computer) calc2.set_resources({ "num_machines": 1, "num_mpiprocs_per_machine": 1 }) calc2.label = "calc2" calc2.store() calc2.add_link_from(pd1, link_type=LinkType.INPUT) calc2.add_link_from(pd2, link_type=LinkType.INPUT) calc2.add_link_from(rd1, link_type=LinkType.INPUT) calc2._set_state(u'SUBMITTING') fd1 = FolderData() fd1.label = "fd1" fd1.store() fd1.add_link_from(calc2, link_type=LinkType.CREATE) node_uuids_labels = { calc1.uuid: calc1.label, pd1.uuid: pd1.label, pd2.uuid: pd2.label, rd1.uuid: rd1.label, calc2.uuid: calc2.label, fd1.uuid: fd1.label } filename = os.path.join(temp_folder, "export.tar.gz") export([fd1.dbnode], outfile=filename, silent=True) self.clean_db() import_data(filename, silent=True, ignore_unknown_nodes=True) for uuid, label in node_uuids_labels.iteritems(): try: load_node(uuid) except NotExistent: self.fail("Node with UUID {} and label {} was not " "found.".format(uuid, label)) finally: # Deleting the created temporary folder shutil.rmtree(temp_folder, ignore_errors=True)
def submit_calc(calc, authinfo, transport=None): """ Submit a calculation :note: if no transport is passed, a new transport is opened and then closed within this function. If you want to use an already opened transport, pass it as further parameter. In this case, the transport has to be already open, and must coincide with the transport of the the computer defined by the authinfo. :param calc: the calculation to submit (an instance of the aiida.orm.JobCalculation class) :param authinfo: the authinfo for this calculation. :param transport: if passed, must be an already opened transport. No checks are done on the consistency of the given transport with the transport of the computer defined in the authinfo. """ from aiida.orm import Code, Computer from aiida.common.folders import SandboxFolder from aiida.common.exceptions import (InputValidationError) from aiida.orm.data.remote import RemoteData from aiida.utils.logger import get_dblogger_extra if not authinfo.enabled: return logger_extra = get_dblogger_extra(calc) if transport is None: t = authinfo.get_transport() must_open_t = True else: t = transport must_open_t = False t._set_logger_extra(logger_extra) if calc._has_cached_links(): raise ValueError("Cannot submit calculation {} because it has " "cached input links! If you " "just want to test the submission, use the " "test_submit() method, otherwise store all links" "first".format(calc.pk)) # Double check, in the case the calculation was 'killed' (and therefore # put in the 'FAILED' state) in the meantime # Do it as near as possible to the state change below (it would be # even better to do it with some sort of transaction) if calc.get_state() != calc_states.TOSUBMIT: raise ValueError("Can only submit calculations with state=TOSUBMIT! " "(state of calc {} is {} instead)".format( calc.pk, calc.get_state())) # I start to submit the calculation: I set the state try: calc._set_state(calc_states.SUBMITTING) except ModificationNotAllowed: raise ValueError("The calculation has already been submitted by " "someone else!") try: if must_open_t: t.open() s = Computer(dbcomputer=authinfo.dbcomputer).get_scheduler() s.set_transport(t) computer = calc.get_computer() with SandboxFolder() as folder: calcinfo, script_filename = calc._presubmit( folder, use_unstored_links=False) codes_info = calcinfo.codes_info input_codes = [ load_node(_.code_uuid, parent_class=Code) for _ in codes_info ] for code in input_codes: if not code.can_run_on(computer): raise InputValidationError( "The selected code {} for calculation " "{} cannot run on computer {}".format( code.pk, calc.pk, computer.name)) # After this call, no modifications to the folder should be done calc._store_raw_input_folder(folder.abspath) # NOTE: some logic is partially replicated in the 'test_submit' # method of JobCalculation. If major logic changes are done # here, make sure to update also the test_submit routine remote_user = t.whoami() # TODO Doc: {username} field # TODO: if something is changed here, fix also 'verdi computer test' remote_working_directory = authinfo.get_workdir().format( username=remote_user) if not remote_working_directory.strip(): raise ConfigurationError( "[submission of calc {}] " "No remote_working_directory configured for computer " "'{}'".format(calc.pk, computer.name)) # If it already exists, no exception is raised try: t.chdir(remote_working_directory) except IOError: execlogger.debug( "[submission of calc {}] " "Unable to chdir in {}, trying to create it".format( calc.pk, remote_working_directory), extra=logger_extra) try: t.makedirs(remote_working_directory) t.chdir(remote_working_directory) except (IOError, OSError) as e: raise ConfigurationError( "[submission of calc {}] " "Unable to create the remote directory {} on " "computer '{}': {}".format(calc.pk, remote_working_directory, computer.name, e.message)) # Store remotely with sharding (here is where we choose # the folder structure of remote jobs; then I store this # in the calculation properties using _set_remote_dir # and I do not have to know the logic, but I just need to # read the absolute path from the calculation properties. t.mkdir(calcinfo.uuid[:2], ignore_existing=True) t.chdir(calcinfo.uuid[:2]) t.mkdir(calcinfo.uuid[2:4], ignore_existing=True) t.chdir(calcinfo.uuid[2:4]) t.mkdir(calcinfo.uuid[4:]) t.chdir(calcinfo.uuid[4:]) workdir = t.getcwd() # I store the workdir of the calculation for later file # retrieval calc._set_remote_workdir(workdir) # I first create the code files, so that the code can put # default files to be overwritten by the plugin itself. # Still, beware! The code file itself could be overwritten... # But I checked for this earlier. for code in input_codes: if code.is_local(): # Note: this will possibly overwrite files for f in code.get_folder_list(): t.put(code.get_abs_path(f), f) t.chmod(code.get_local_executable(), 0755) # rwxr-xr-x # copy all files, recursively with folders for f in folder.get_content_list(): execlogger.debug("[submission of calc {}] " "copying file/folder {}...".format( calc.pk, f), extra=logger_extra) t.put(folder.get_abs_path(f), f) # local_copy_list is a list of tuples, # each with (src_abs_path, dest_rel_path) # NOTE: validation of these lists are done # inside calc._presubmit() local_copy_list = calcinfo.local_copy_list remote_copy_list = calcinfo.remote_copy_list remote_symlink_list = calcinfo.remote_symlink_list if local_copy_list is not None: for src_abs_path, dest_rel_path in local_copy_list: execlogger.debug("[submission of calc {}] " "copying local file/folder to {}".format( calc.pk, dest_rel_path), extra=logger_extra) t.put(src_abs_path, dest_rel_path) if remote_copy_list is not None: for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_copy_list: if remote_computer_uuid == computer.uuid: execlogger.debug( "[submission of calc {}] " "copying {} remotely, directly on the machine " "{}".format(calc.pk, dest_rel_path, computer.name)) try: t.copy(remote_abs_path, dest_rel_path) except (IOError, OSError): execlogger.warning( "[submission of calc {}] " "Unable to copy remote resource from {} to {}! " "Stopping.".format(calc.pk, remote_abs_path, dest_rel_path), extra=logger_extra) raise else: # TODO: implement copy between two different # machines! raise NotImplementedError( "[presubmission of calc {}] " "Remote copy between two different machines is " "not implemented yet".format(calc.pk)) if remote_symlink_list is not None: for (remote_computer_uuid, remote_abs_path, dest_rel_path) in remote_symlink_list: if remote_computer_uuid == computer.uuid: execlogger.debug( "[submission of calc {}] " "copying {} remotely, directly on the machine " "{}".format(calc.pk, dest_rel_path, computer.name)) try: t.symlink(remote_abs_path, dest_rel_path) except (IOError, OSError): execlogger.warning( "[submission of calc {}] " "Unable to create remote symlink from {} to {}! " "Stopping.".format(calc.pk, remote_abs_path, dest_rel_path), extra=logger_extra) raise else: raise IOError("It is not possible to create a symlink " "between two different machines for " "calculation {}".format(calc.pk)) remotedata = RemoteData(computer=computer, remote_path=workdir) remotedata.add_link_from(calc, label='remote_folder', link_type=LinkType.CREATE) remotedata.store() job_id = s.submit_from_script(t.getcwd(), script_filename) calc._set_job_id(job_id) # This should always be possible, because we should be # the only ones submitting this calculations, # so I do not check the ModificationNotAllowed calc._set_state(calc_states.WITHSCHEDULER) ## I do not set the state to queued; in this way, if the ## daemon is down, the user sees '(unknown)' as last state ## and understands that the daemon is not running. # if job_tmpl.submit_as_hold: # calc._set_scheduler_state(job_states.QUEUED_HELD) #else: # calc._set_scheduler_state(job_states.QUEUED) execlogger.debug("submitted calculation {} on {} with " "jobid {}".format(calc.pk, computer.name, job_id), extra=logger_extra) except Exception as e: import traceback try: calc._set_state(calc_states.SUBMISSIONFAILED) except ModificationNotAllowed: # Someone already set it, just skip pass execlogger.error("Submission of calc {} failed, check also the " "log file! Traceback: {}".format( calc.pk, traceback.format_exc()), extra=logger_extra) raise finally: # close the transport, but only if it was opened within this function if must_open_t: t.close()