def submit_jobs(): """ Submit all jobs in the TOSUBMIT state. """ from aiida.orm import JobCalculation, Computer, User from aiida.utils.logger import get_dblogger_extra from aiida.backends.utils import get_authinfo computers_users_to_check = list( JobCalculation._get_all_with_state(state=calc_states.TOSUBMIT, only_computer_user_pairs=True, only_enabled=True)) for computer, aiidauser in computers_users_to_check: #~ user = User.search_for_users(id=dbuser_id) #~ computer = Computer.get(dbcomputer_id) execlogger.debug("({},{}) pair to submit".format( aiidauser.email, computer.name)) try: try: authinfo = get_authinfo(computer.dbcomputer, aiidauser._dbuser) except AuthenticationError: # TODO!! # Put each calculation in the SUBMISSIONFAILED state because # I do not have AuthInfo to submit them calcs_to_inquire = JobCalculation._get_all_with_state( state=calc_states.TOSUBMIT, computer=computer, user=aiidauser) for calc in calcs_to_inquire: try: calc._set_state(calc_states.SUBMISSIONFAILED) except ModificationNotAllowed: # Someone already set it, just skip pass logger_extra = get_dblogger_extra(calc) execlogger.error("Submission of calc {} failed, " "computer pk= {} ({}) is not configured " "for aiidauser {}".format( calc.pk, computer.pk, computer.get_name(), aiidauser.email), extra=logger_extra) # Go to the next (dbcomputer,aiidauser) pair continue submitted_calcs = submit_jobs_with_authinfo(authinfo) except Exception as e: import traceback msg = ("Error while submitting jobs " "for aiidauser={} on computer={}, " "error type is {}, traceback: {}".format( aiidauser.email, computer.name, e.__class__.__name__, traceback.format_exc())) print msg execlogger.error(msg) # Continue with next computer continue
def get_calculations(self, state=None): from aiida.orm import JobCalculation if (state == None): return JobCalculation.query(workflow_step=self) else: return JobCalculation.query(workflow_step=self).filter( dbattributes__key="state", dbattributes__tval=state)
def test_with_subclasses(self, computer): extra_name = self.__class__.__name__ + "/test_with_subclasses" calc_params = { 'computer': computer, 'resources': { 'num_machines': 1, 'num_mpiprocs_per_machine': 1 } } TemplateReplacerCalc = CalculationFactory( 'simpleplugins.templatereplacer') ParameterData = DataFactory('parameter') a1 = JobCalculation(**calc_params).store() # To query only these nodes later a1.set_extra(extra_name, True) a2 = TemplateReplacerCalc(**calc_params).store() # To query only these nodes later a2.set_extra(extra_name, True) a3 = Data().store() a3.set_extra(extra_name, True) a4 = ParameterData(dict={'a': 'b'}).store() a4.set_extra(extra_name, True) a5 = Node().store() a5.set_extra(extra_name, True) # I don't set the extras, just to be sure that the filtering works # The filtering is needed because other tests will put stuff int he DB a6 = JobCalculation(**calc_params) a6.store() a7 = Node() a7.store() # Query by calculation results = list(JobCalculation.query(dbextras__key=extra_name)) # a3, a4, a5 should not be found because they are not JobCalculations. # a6, a7 should not be found because they have not the attribute set. self.assertEquals(set([i.pk for i in results]), set([a1.pk, a2.pk])) # Same query, but by the generic Node class results = list(Node.query(dbextras__key=extra_name)) self.assertEquals(set([i.pk for i in results]), set([a1.pk, a2.pk, a3.pk, a4.pk, a5.pk])) # Same query, but by the Data class results = list(Data.query(dbextras__key=extra_name)) self.assertEquals(set([i.pk for i in results]), set([a3.pk, a4.pk])) # Same query, but by the ParameterData subclass results = list(ParameterData.query(dbextras__key=extra_name)) self.assertEquals(set([i.pk for i in results]), set([a4.pk])) # Same query, but by the TemplateReplacerCalc subclass results = list(TemplateReplacerCalc.query(dbextras__key=extra_name)) self.assertEquals(set([i.pk for i in results]), set([a2.pk]))
def fill_repo(self): from aiida.orm import JobCalculation, CalculationFactory, Data, DataFactory extra_name = self.__class__.__name__ + "/test_with_subclasses" calc_params = { 'computer': self.computer, 'resources': { 'num_machines': 1, 'num_mpiprocs_per_machine': 1 } } TemplateReplacerCalc = CalculationFactory( 'simpleplugins.templatereplacer') ParameterData = DataFactory('parameter') a1 = JobCalculation(**calc_params).store() # To query only these nodes later a1.set_extra(extra_name, True) a2 = TemplateReplacerCalc(**calc_params).store() # To query only these nodes later a2.set_extra(extra_name, True) a3 = Data().store() a3.set_extra(extra_name, True) a4 = ParameterData(dict={'a': 'b'}).store() a4.set_extra(extra_name, True) a5 = Node().store() a5.set_extra(extra_name, True) # I don't set the extras, just to be sure that the filtering works # The filtering is needed because other tests will put stuff int he DB a6 = JobCalculation(**calc_params) a6.store() a7 = Node() a7.store()
def update_jobs(): """ calls an update for each set of pairs (machine, aiidauser) """ from aiida.orm import JobCalculation, Computer, User from aiida.backends.utils import get_authinfo # I create a unique set of pairs (computer, aiidauser) computers_users_to_check = list( JobCalculation._get_all_with_state(state=calc_states.WITHSCHEDULER, only_computer_user_pairs=True, only_enabled=True)) for computer, aiidauser in computers_users_to_check: execlogger.debug("({},{}) pair to check".format( aiidauser.email, computer.name)) try: authinfo = get_authinfo(computer.dbcomputer, aiidauser._dbuser) computed_calcs = update_running_calcs_status(authinfo) except Exception as e: msg = ("Error while updating calculation status " "for aiidauser={} on computer={}, " "error type is {}, error message: {}".format( aiidauser.email, computer.name, e.__class__.__name__, e.message)) execlogger.error(msg) # Continue with next computer continue
def retrieve_jobs(): from aiida.orm import JobCalculation, Computer from aiida.backends.utils import get_authinfo # I create a unique set of pairs (computer, aiidauser) computers_users_to_check = list( JobCalculation._get_all_with_state(state=calc_states.COMPUTED, only_computer_user_pairs=True, only_enabled=True)) for computer, aiidauser in computers_users_to_check: execlogger.debug("({},{}) pair to check".format( aiidauser.email, computer.name)) try: authinfo = get_authinfo(computer.dbcomputer, aiidauser._dbuser) retrieve_computed_for_authinfo(authinfo) except Exception as e: msg = ("Error while retrieving calculation status for " "aiidauser={} on computer={}, " "error type is {}, error message: {}".format( aiidauser.email, computer.name, e.__class__.__name__, e.message)) execlogger.error(msg) # Continue with next computer continue
def test_deletion(self): from aiida.orm.computer import Computer from aiida.orm import delete_computer, JobCalculation from aiida.common.exceptions import InvalidOperation newcomputer = Computer(name="testdeletioncomputer", hostname='localhost', transport_type='local', scheduler_type='pbspro', workdir='/tmp/aiida').store() # # This should be possible, because nothing is using this computer delete_computer(newcomputer) calc_params = { 'computer': self.computer, 'resources': {'num_machines': 1, 'num_mpiprocs_per_machine': 1} } _ = JobCalculation(**calc_params).store() # This should fail, because there is at least a calculation # using this computer (the one created just above) with self.assertRaises(InvalidOperation): delete_computer(self.computer)
def sub_create_bands_data(cls, user=None): from aiida.orm.data.array.kpoints import KpointsData from aiida.orm import JobCalculation from aiida.orm.data.structure import StructureData from aiida.common.links import LinkType from aiida.orm.data.array.bands import BandsData import numpy s = StructureData(cell=((2., 0., 0.), (0., 2., 0.), (0., 0., 2.))) s.append_atom(position=(0., 0., 0.), symbols=['Ba', 'Ti'], weights=(1., 0.), name='mytype') if user is not None: s.dbnode.user = user._dbuser s.store() c = JobCalculation(computer=cls.computer, resources={ 'num_machines': 1, 'num_mpiprocs_per_machine': 1 }) if user is not None: c.dbnode.user = user._dbuser c.store() c.add_link_from(s, "S1", LinkType.INPUT) c._set_state(calc_states.RETRIEVING) # define a cell alat = 4. cell = numpy.array([ [alat, 0., 0.], [0., alat, 0.], [0., 0., alat], ]) k = KpointsData() k.set_cell(cell) k.set_kpoints_path() if user is not None: k.dbnode.user = user._dbuser k.store() b = BandsData() b.set_kpointsdata(k) input_bands = numpy.array( [numpy.ones(4) * i for i in range(k.get_kpoints().shape[0])]) b.set_bands(input_bands, units='eV') if user is not None: b.dbnode.user = user._dbuser b.store() b.add_link_from(c, link_type=LinkType.CREATE) return b
def setUpClass(cls): """ Create some code to test the CalculationParamType parameter type for the command line infrastructure We create an initial code with a random name and then on purpose create two code with a name that matches exactly the ID and UUID, respectively, of the first one. This allows us to test the rules implemented to solve ambiguities that arise when determing the identifier type """ super(TestCalculationParamType, cls).setUpClass() cls.param = CalculationParamType() cls.entity_01 = Calculation().store() cls.entity_02 = Calculation().store() cls.entity_03 = Calculation().store() cls.entity_04 = FunctionCalculation() cls.entity_05 = InlineCalculation() cls.entity_06 = JobCalculation() cls.entity_07 = WorkCalculation() cls.entity_01.label = 'calculation_01' cls.entity_02.label = str(cls.entity_01.pk) cls.entity_03.label = str(cls.entity_01.uuid)
def get_calculations(self): from aiida.orm import JobCalculation return JobCalculation.query(workflow_step=self.steps)
def execute_steps(): """ This method loops on the RUNNING workflows and handled the execution of the steps until each workflow reaches an end (or gets stopped for errors). In the loop for each RUNNING workflow the method loops also in each of its RUNNING steps, testing if all the calculation and subworkflows attached to the step are FINISHED. In this case the step is set as FINISHED and the workflow is advanced to the step's next method present in the db with ``advance_workflow``, otherwise if any step's JobCalculation is found in NEW state the method will submit. If none of the previous conditions apply the step is flagged as ERROR and cannot proceed anymore, blocking the future execution of the step and, connected, the workflow. Finally, for each workflow the method tests if there are INITIALIZED steps to be launched, and in case reloads the workflow and execute the specific those steps. In case or error the step is flagged in ERROR state and the stack is reported in the workflow report. """ from aiida.orm import JobCalculation from aiida.orm.implementation import get_all_running_steps logger.info("Querying the worflow DB") running_steps = get_all_running_steps() for s in running_steps: if s.parent.state == wf_states.FINISHED: s.set_state(wf_states.FINISHED) continue w = s.parent.get_aiida_class() logger.info("[{0}] Found active step: {1}".format(w.pk, s.name)) s_calcs_new = [c.pk for c in s.get_calculations() if c._is_new()] s_calcs_finished = [ c.pk for c in s.get_calculations() if c.has_finished_ok() ] s_calcs_failed = [c.pk for c in s.get_calculations() if c.has_failed()] s_calcs_num = len(s.get_calculations()) s_sub_wf_finished = [ sw.pk for sw in s.get_sub_workflows() if sw.has_finished_ok() ] s_sub_wf_failed = [ sw.pk for sw in s.get_sub_workflows() if sw.has_failed() ] s_sub_wf_num = len(s.get_sub_workflows()) if (s_calcs_num == (len(s_calcs_finished) + len(s_calcs_failed)) and s_sub_wf_num == (len(s_sub_wf_finished) + len(s_sub_wf_failed))): logger.info("[{0}] Step: {1} ready to move".format(w.pk, s.name)) s.set_state(wf_states.FINISHED) advance_workflow(w, s) elif len(s_calcs_new) > 0: for pk in s_calcs_new: obj_calc = JobCalculation.get_subclass_from_pk(pk=pk) try: obj_calc.submit() logger.info( "[{0}] Step: {1} launched calculation {2}".format( w.pk, s.name, pk)) except: logger.error( "[{0}] Step: {1} cannot launch calculation {2}".format( w.pk, s.name, pk))
def setUpClass(cls, *args, **kwargs): super(TestVerdiCalculation, cls).setUpClass(*args, **kwargs) from aiida.backends.tests.utils.fixtures import import_archive_fixture from aiida.common.exceptions import ModificationNotAllowed from aiida.common.links import LinkType from aiida.orm import Code, Computer, Group, Node, JobCalculation, CalculationFactory from aiida.orm.data.parameter import ParameterData from aiida.orm.querybuilder import QueryBuilder from aiida.work.processes import ProcessState rmq_config = rmq.get_rmq_config() # These two need to share a common event loop otherwise the first will never send # the message while the daemon is running listening to intercept cls.runner = runners.Runner(rmq_config=rmq_config, rmq_submit=True, poll_interval=0.) cls.daemon_runner = runners.DaemonRunner(rmq_config=rmq_config, rmq_submit=True, poll_interval=0.) cls.computer = Computer(name='comp', hostname='localhost', transport_type='local', scheduler_type='direct', workdir='/tmp/aiida').store() cls.code = Code(remote_computer_exec=(cls.computer, '/bin/true')).store() cls.group = Group(name='test_group').store() cls.node = Node().store() cls.calcs = [] from aiida.orm.backend import construct_backend backend = construct_backend() authinfo = backend.authinfos.create( computer=cls.computer, user=backend.users.get_automatic_user()) authinfo.store() # Create 13 JobCalculations (one for each CalculationState) for calculation_state in calc_states: calc = JobCalculation(computer=cls.computer, resources={ 'num_machines': 1, 'num_mpiprocs_per_machine': 1 }).store() # Trying to set NEW will raise, but in this case we don't need to change the state try: calc._set_state(calculation_state) except ModificationNotAllowed: pass try: exit_status = JobCalculationExitStatus[calculation_state] except KeyError: if calculation_state == 'IMPORTED': calc._set_process_state(ProcessState.FINISHED) else: calc._set_process_state(ProcessState.RUNNING) else: calc._set_exit_status(exit_status) calc._set_process_state(ProcessState.FINISHED) cls.calcs.append(calc) if calculation_state == 'PARSING': cls.KEY_ONE = 'key_one' cls.KEY_TWO = 'key_two' cls.VAL_ONE = 'val_one' cls.VAL_TWO = 'val_two' output_parameters = ParameterData(dict={ cls.KEY_ONE: cls.VAL_ONE, cls.KEY_TWO: cls.VAL_TWO, }).store() output_parameters.add_link_from(calc, 'output_parameters', link_type=LinkType.RETURN) # Create shortcut for easy dereferencing cls.result_job = calc # Add a single calc to a group cls.group.add_nodes([calc]) # Load the fixture containing a single ArithmeticAddCalculation node import_archive_fixture( 'calculation/simpleplugins.arithmetic.add.aiida') # Get the imported ArithmeticAddCalculation node ArithmeticAddCalculation = CalculationFactory( 'simpleplugins.arithmetic.add') calculations = QueryBuilder().append(ArithmeticAddCalculation).all()[0] cls.arithmetic_job = calculations[0]
def mounet_daemon(): return JobCalculation.query(dbattributes__key='state', dbattributes__tval='WITHSCHEDULER').count()
def update_running_calcs_status(authinfo): """ Update the states of calculations in WITHSCHEDULER status belonging to user and machine as defined in the 'dbauthinfo' table. """ from aiida.orm import JobCalculation, Computer from aiida.scheduler.datastructures import JobInfo from aiida.utils.logger import get_dblogger_extra if not authinfo.enabled: return execlogger.debug("Updating running calc status for user {} " "and machine {}".format(authinfo.aiidauser.email, authinfo.dbcomputer.name)) # This returns an iterator over aiida JobCalculation objects calcs_to_inquire = list( JobCalculation._get_all_with_state(state=calc_states.WITHSCHEDULER, computer=authinfo.dbcomputer, user=authinfo.aiidauser)) # NOTE: no further check is done that machine and # aiidauser are correct for each calc in calcs s = Computer(dbcomputer=authinfo.dbcomputer).get_scheduler() t = authinfo.get_transport() computed = [] # I avoid to open an ssh connection if there are # no calcs with state WITHSCHEDULER if len(calcs_to_inquire): jobids_to_inquire = [str(c.get_job_id()) for c in calcs_to_inquire] # Open connection with t: s.set_transport(t) # TODO: Check if we are ok with filtering by job (to make this work, # I had to remove the check on the retval for getJobs, # because if the job has computed and is not in the output of # qstat, it gives a nonzero retval) # TODO: catch SchedulerError exception and do something # sensible (at least, skip this computer but continue with # following ones, and set a counter; set calculations to # UNKNOWN after a while? if s.get_feature('can_query_by_user'): found_jobs = s.getJobs(user="******", as_dict=True) else: found_jobs = s.getJobs(jobs=jobids_to_inquire, as_dict=True) # I update the status of jobs for c in calcs_to_inquire: try: logger_extra = get_dblogger_extra(c) t._set_logger_extra(logger_extra) jobid = c.get_job_id() if jobid is None: execlogger.error("JobCalculation {} is WITHSCHEDULER " "but no job id was found!".format( c.pk), extra=logger_extra) continue # I check if the calculation to be checked (c) # is in the output of qstat if jobid in found_jobs: # jobinfo: the information returned by # qstat for this job jobinfo = found_jobs[jobid] execlogger.debug("Inquirying calculation {} (jobid " "{}): it has job_state={}".format( c.pk, jobid, jobinfo.job_state), extra=logger_extra) # For the moment, FAILED is not defined if jobinfo.job_state in [job_states.DONE ]: # , job_states.FAILED]: computed.append(c) try: c._set_state(calc_states.COMPUTED) except ModificationNotAllowed: # Someone already set it, just skip pass ## Do not set the WITHSCHEDULER state multiple times, ## this would raise a ModificationNotAllowed # else: # c._set_state(calc_states.WITHSCHEDULER) c._set_scheduler_state(jobinfo.job_state) c._set_last_jobinfo(jobinfo) else: execlogger.debug("Inquirying calculation {} (jobid " "{}): not found, assuming " "job_state={}".format( c.pk, jobid, job_states.DONE), extra=logger_extra) # calculation c is not found in the output of qstat computed.append(c) c._set_scheduler_state(job_states.DONE) except Exception as e: # TODO: implement a counter, after N retrials # set it to a status that # requires the user intervention execlogger.warning("There was an exception for " "calculation {} ({}): {}".format( c.pk, e.__class__.__name__, e.message), extra=logger_extra) continue for c in computed: try: logger_extra = get_dblogger_extra(c) try: detailed_jobinfo = s.get_detailed_jobinfo( jobid=c.get_job_id()) except NotImplementedError: detailed_jobinfo = ( u"AiiDA MESSAGE: This scheduler does not implement " u"the routine get_detailed_jobinfo to retrieve " u"the information on " u"a job after it has finished.") last_jobinfo = c._get_last_jobinfo() if last_jobinfo is None: last_jobinfo = JobInfo() last_jobinfo.job_id = c.get_job_id() last_jobinfo.job_state = job_states.DONE last_jobinfo.detailedJobinfo = detailed_jobinfo c._set_last_jobinfo(last_jobinfo) except Exception as e: execlogger.warning("There was an exception while " "retrieving the detailed jobinfo " "for calculation {} ({}): {}".format( c.pk, e.__class__.__name__, e.message), extra=logger_extra) continue finally: # Set the state to COMPUTED as the very last thing # of this routine; no further change should be done after # this, so that in general the retriever can just # poll for this state, if we want to. try: c._set_state(calc_states.COMPUTED) except ModificationNotAllowed: # Someone already set it, just skip pass return computed
def execute_steps(): """ This method loops on the RUNNING workflows and handled the execution of the steps until each workflow reaches an end (or gets stopped for errors). In the loop for each RUNNING workflow the method loops also in each of its RUNNING steps, testing if all the calculation and subworkflows attached to the step are FINISHED. In this case the step is set as FINISHED and the workflow is advanced to the step's next method present in the db with ``advance_workflow``, otherwise if any step's JobCalculation is found in NEW state the method will submit. If none of the previous conditions apply the step is flagged as ERROR and cannot proceed anymore, blocking the future execution of the step and, connected, the workflow. Finally, for each workflow the method tests if there are INITIALIZED steps to be launched, and in case reloads the workflow and execute the specific those steps. In case or error the step is flagged in ERROR state and the stack is reported in the workflow report. """ from aiida.backends.utils import get_automatic_user from aiida.orm.workflow import Workflow from aiida.common.datastructures import wf_states from aiida.orm import JobCalculation logger.info("Querying the worflow DB") w_list = Workflow.query(user=get_automatic_user(), state=wf_states.RUNNING) for w in w_list: logger.info("Found active workflow: {0}".format(w.uuid)) # Launch INITIALIZED Workflows # running_steps = w.get_steps(state=wf_states.RUNNING) for s in running_steps: logger.info("[{0}] Found active step: {1}".format(w.uuid, s.name)) s_calcs_new = [c.uuid for c in s.get_calculations() if c._is_new()] s_calcs_running = [ c.uuid for c in s.get_calculations() if c._is_running() ] s_calcs_finished = [ c.uuid for c in s.get_calculations() if c.has_finished_ok() ] s_calcs_failed = [ c.uuid for c in s.get_calculations() if c.has_failed() ] s_calcs_num = len(s.get_calculations()) s_sub_wf_running = [ sw.uuid for sw in s.get_sub_workflows() if sw.is_running() ] s_sub_wf_finished = [ sw.uuid for sw in s.get_sub_workflows() if sw.has_finished_ok() ] s_sub_wf_failed = [ sw.uuid for sw in s.get_sub_workflows() if sw.has_failed() ] s_sub_wf_num = len(s.get_sub_workflows()) if s_calcs_num == (len(s_calcs_finished) + len(s_calcs_failed) ) and s_sub_wf_num == (len(s_sub_wf_finished) + len(s_sub_wf_failed)): logger.info("[{0}] Step: {1} ready to move".format( w.uuid, s.name)) s.set_state(wf_states.FINISHED) advance_workflow(w, s) elif len(s_calcs_new) > 0: for uuid in s_calcs_new: obj_calc = JobCalculation.get_subclass_from_uuid(uuid=uuid) try: obj_calc.submit() logger.info( "[{0}] Step: {1} launched calculation {2}".format( w.uuid, s.name, uuid)) except: logger.error( "[{0}] Step: {1} cannot launch calculation {2}". format(w.uuid, s.name, uuid)) ## DO NOT STOP ANYMORE IF A CALCULATION FAILS # elif s_calcs_failed: #s.set_state(wf_states.ERROR) initialized_steps = w.get_steps(state=wf_states.INITIALIZED) for s in initialized_steps: import sys try: w_class = Workflow.get_subclass_from_uuid(w.uuid) getattr(w, s.name)() return True except: exc_type, exc_value, exc_traceback = sys.exc_info() w.append_to_report( "ERROR ! This workflow got an error in the {0} method, we report down the stack trace" .format(s.name)) w.append_to_report("full traceback: {0}".format( exc_traceback.format_exc())) s.set_state(wf_states.ERROR) w.set_state(wf_states.ERROR) for w in w_list: if w.get_steps(state=wf_states.ERROR): w.set_state(wf_states.ERROR)
def submit_jobs_with_authinfo(authinfo): """ Submit jobs in TOSUBMIT status belonging to user and machine as defined in the 'dbauthinfo' table. """ from aiida.orm import JobCalculation from aiida.utils.logger import get_dblogger_extra if not authinfo.enabled: return execlogger.debug("Submitting jobs for user {} " "and machine {}".format(authinfo.aiidauser.email, authinfo.dbcomputer.name)) # This returns an iterator over aiida JobCalculation objects calcs_to_inquire = list( JobCalculation._get_all_with_state(state=calc_states.TOSUBMIT, computer=authinfo.dbcomputer, user=authinfo.aiidauser)) # I avoid to open an ssh connection if there are # no calcs with state WITHSCHEDULER if len(calcs_to_inquire): # Open connection try: # I do it here so that the transport is opened only once per computer with authinfo.get_transport() as t: for c in calcs_to_inquire: logger_extra = get_dblogger_extra(c) t._set_logger_extra(logger_extra) try: submit_calc(calc=c, authinfo=authinfo, transport=t) except Exception as e: # TODO: implement a counter, after N retrials # set it to a status that # requires the user intervention execlogger.warning("There was an exception for " "calculation {} ({}): {}".format( c.pk, e.__class__.__name__, e.message)) # I just proceed to the next calculation continue # Catch exceptions also at this level (this happens only if there is # a problem opening the transport in the 'with t' statement, # because any other exception is caught and skipped above except Exception as e: import traceback from aiida.utils.logger import get_dblogger_extra for calc in calcs_to_inquire: logger_extra = get_dblogger_extra(calc) try: calc._set_state(calc_states.SUBMISSIONFAILED) except ModificationNotAllowed: # Someone already set it, just skip pass execlogger.error( "Submission of calc {} failed, check also the " "log file! Traceback: {}".format(calc.pk, traceback.format_exc()), extra=logger_extra) raise
def test_cif_structure_roundtrip(self): from aiida.tools.dbexporters.tcod import export_cif, export_values from aiida.orm import Code from aiida.orm import JobCalculation from aiida.orm.data.cif import CifData from aiida.orm.data.parameter import ParameterData from aiida.orm.data.upf import UpfData from aiida.orm.data.folder import FolderData from aiida.common.folders import SandboxFolder from aiida.common.datastructures import calc_states import tempfile with tempfile.NamedTemporaryFile() as f: f.write(''' data_test _cell_length_a 10 _cell_length_b 10 _cell_length_c 10 _cell_angle_alpha 90 _cell_angle_beta 90 _cell_angle_gamma 90 loop_ _atom_site_label _atom_site_fract_x _atom_site_fract_y _atom_site_fract_z C 0 0 0 O 0.5 0.5 0.5 ''') f.flush() a = CifData(file=f.name) c = a._get_aiida_structure() c.store() pd = ParameterData() code = Code(local_executable='test.sh') with tempfile.NamedTemporaryFile() as f: f.write("#/bin/bash\n\necho test run\n") f.flush() code.add_path(f.name, 'test.sh') code.store() calc = JobCalculation(computer=self.computer) calc.set_resources({'num_machines': 1, 'num_mpiprocs_per_machine': 1}) calc.add_link_from(code, "code") calc.set_environment_variables({ 'PATH': '/dev/null', 'USER': '******' }) with tempfile.NamedTemporaryFile(prefix="Fe") as f: f.write("<UPF version=\"2.0.1\">\nelement=\"Fe\"\n") f.flush() upf = UpfData(file=f.name) upf.store() calc.add_link_from(upf, "upf") with tempfile.NamedTemporaryFile() as f: f.write("data_test") f.flush() cif = CifData(file=f.name) cif.store() calc.add_link_from(cif, "cif") calc.store() calc._set_state(calc_states.SUBMITTING) with SandboxFolder() as f: calc._store_raw_input_folder(f.abspath) fd = FolderData() with open( fd._get_folder_pathsubfolder.get_abs_path( calc._SCHED_OUTPUT_FILE), 'w') as f: f.write("standard output") f.flush() with open( fd._get_folder_pathsubfolder.get_abs_path( calc._SCHED_ERROR_FILE), 'w') as f: f.write("standard error") f.flush() fd.store() fd.add_link_from(calc, calc._get_linkname_retrieved(), LinkType.CREATE) pd.add_link_from(calc, "calc", LinkType.CREATE) pd.store() with self.assertRaises(ValueError): export_cif(c, parameters=pd) c.add_link_from(calc, "calc", LinkType.CREATE) export_cif(c, parameters=pd) values = export_values(c, parameters=pd) values = values['0'] self.assertEquals(values['_tcod_computation_environment'], ['PATH=/dev/null\nUSER=unknown']) self.assertEquals(values['_tcod_computation_command'], ['cd 1; ./_aiidasubmit.sh'])
def retrieve_computed_for_authinfo(authinfo): from aiida.orm import JobCalculation from aiida.common.folders import SandboxFolder from aiida.orm.data.folder import FolderData from aiida.utils.logger import get_dblogger_extra from aiida.orm import DataFactory import os if not authinfo.enabled: return calcs_to_retrieve = list( JobCalculation._get_all_with_state(state=calc_states.COMPUTED, computer=authinfo.dbcomputer, user=authinfo.aiidauser)) retrieved = [] # I avoid to open an ssh connection if there are no # calcs with state not COMPUTED if len(calcs_to_retrieve): # Open connection with authinfo.get_transport() as t: for calc in calcs_to_retrieve: logger_extra = get_dblogger_extra(calc) t._set_logger_extra(logger_extra) try: calc._set_state(calc_states.RETRIEVING) except ModificationNotAllowed: # Someone else has already started to retrieve it, # just log and continue execlogger.debug("Attempting to retrieve more than once " "calculation {}: skipping!".format( calc.pk), extra=logger_extra) continue # with the next calculation to retrieve try: execlogger.debug("Retrieving calc {}".format(calc.pk), extra=logger_extra) workdir = calc._get_remote_workdir() retrieve_list = calc._get_retrieve_list() retrieve_singlefile_list = calc._get_retrieve_singlefile_list( ) execlogger.debug("[retrieval of calc {}] " "chdir {}".format(calc.pk, workdir), extra=logger_extra) t.chdir(workdir) retrieved_files = FolderData() retrieved_files.add_link_from( calc, label=calc._get_linkname_retrieved(), link_type=LinkType.CREATE) # First, retrieve the files of folderdata with SandboxFolder() as folder: for item in retrieve_list: # I have two possibilities: # * item is a string # * or is a list # then I have other two possibilities: # * there are file patterns # * or not # First decide the name of the files if isinstance(item, list): tmp_rname, tmp_lname, depth = item # if there are more than one file I do something differently if t.has_magic(tmp_rname): remote_names = t.glob(tmp_rname) local_names = [] for rem in remote_names: to_append = rem.split( os.path.sep )[-depth:] if depth > 0 else [] local_names.append( os.path.sep.join([tmp_lname] + to_append)) else: remote_names = [tmp_rname] to_append = remote_names.split( os.path.sep )[-depth:] if depth > 0 else [] local_names = [ os.path.sep.join([tmp_lname] + to_append) ] if depth > 1: # create directories in the folder, if needed for this_local_file in local_names: new_folder = os.path.join( folder.abspath, os.path.split(this_local_file)[0]) if not os.path.exists(new_folder): os.makedirs(new_folder) else: # it is a string if t.has_magic(item): remote_names = t.glob(item) local_names = [ os.path.split(rem)[1] for rem in remote_names ] else: remote_names = [item] local_names = [os.path.split(item)[1]] for rem, loc in zip(remote_names, local_names): execlogger.debug( "[retrieval of calc {}] " "Trying to retrieve remote item '{}'". format(calc.pk, rem), extra=logger_extra) t.get(rem, os.path.join(folder.abspath, loc), ignore_nonexisting=True) # Here I retrieved everything; # now I store them inside the calculation retrieved_files.replace_with_folder(folder.abspath, overwrite=True) # Second, retrieve the singlefiles with SandboxFolder() as folder: singlefile_list = [] for (linkname, subclassname, filename) in retrieve_singlefile_list: execlogger.debug( "[retrieval of calc {}] Trying " "to retrieve remote singlefile '{}'".format( calc.pk, filename), extra=logger_extra) localfilename = os.path.join( folder.abspath, os.path.split(filename)[1]) t.get(filename, localfilename, ignore_nonexisting=True) singlefile_list.append( (linkname, subclassname, localfilename)) # ignore files that have not been retrieved singlefile_list = [ i for i in singlefile_list if os.path.exists(i[2]) ] # after retrieving from the cluster, I create the objects singlefiles = [] for (linkname, subclassname, filename) in singlefile_list: SinglefileSubclass = DataFactory(subclassname) singlefile = SinglefileSubclass() singlefile.set_file(filename) singlefile.add_link_from(calc, label=linkname, link_type=LinkType.CREATE) singlefiles.append(singlefile) # Finally, store execlogger.debug("[retrieval of calc {}] " "Storing retrieved_files={}".format( calc.pk, retrieved_files.dbnode.pk), extra=logger_extra) retrieved_files.store() for fil in singlefiles: execlogger.debug( "[retrieval of calc {}] " "Storing retrieved_singlefile={}".format( calc.pk, fil.dbnode.pk), extra=logger_extra) fil.store() # If I was the one retrieving, I should also be the only # one parsing! I do not check calc._set_state(calc_states.PARSING) Parser = calc.get_parserclass() # If no parser is set, the calculation is successful successful = True if Parser is not None: # TODO: parse here parser = Parser(calc) successful, new_nodes_tuple = parser.parse_from_calc() for label, n in new_nodes_tuple: n.add_link_from(calc, label=label, link_type=LinkType.CREATE) n.store() if successful: try: calc._set_state(calc_states.FINISHED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass else: try: calc._set_state(calc_states.FAILED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass execlogger.error( "[parsing of calc {}] " "The parser returned an error, but it should have " "created an output node with some partial results " "and warnings. Check there for more information on " "the problem".format(calc.pk), extra=logger_extra) retrieved.append(calc) except Exception: import traceback tb = traceback.format_exc() newextradict = logger_extra.copy() newextradict['full_traceback'] = tb if calc.get_state() == calc_states.PARSING: execlogger.error("Error parsing calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) # TODO: add a 'comment' to the calculation try: calc._set_state(calc_states.PARSINGFAILED) except ModificationNotAllowed: pass else: execlogger.error("Error retrieving calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) try: calc._set_state(calc_states.RETRIEVALFAILED) except ModificationNotAllowed: pass raise return retrieved
def setUpClass(cls, *args, **kwargs): """ Create some calculations with various states """ super(TestVerdiCalculationCommands, cls).setUpClass() from aiida.orm import JobCalculation # Create some calculation calc1 = JobCalculation(computer=cls.computer, resources={ 'num_machines': 1, 'num_mpiprocs_per_machine': 1 }).store() calc1._set_state(calc_states.TOSUBMIT) calc2 = JobCalculation(computer=cls.computer.name, resources={ 'num_machines': 1, 'num_mpiprocs_per_machine': 1 }).store() calc2._set_state(calc_states.COMPUTED) calc3 = JobCalculation(computer=cls.computer.id, resources={ 'num_machines': 1, 'num_mpiprocs_per_machine': 1 }).store() calc3._set_state(calc_states.FINISHED)
def mounet_daemon(): return JobCalculation.query( dbattributes__key='state', dbattributes__tval='WITHSCHEDULER').with_entities(func.count( DbNode.id)).scalar()