def update_jobs(): """ calls an update for each set of pairs (machine, aiidauser) """ from aiida.orm import JobCalculation, Computer, User from aiida.backends.utils import get_authinfo, QueryFactory qmanager = QueryFactory()() # I create a unique set of pairs (computer, aiidauser) computers_users_to_check = qmanager.query_jobcalculations_by_computer_user_state( state=calc_states.WITHSCHEDULER, only_computer_user_pairs=True, only_enabled=True) for computer, aiidauser in computers_users_to_check: execlogger.debug("({},{}) pair to check".format( aiidauser.email, computer.name)) try: authinfo = get_authinfo(computer.dbcomputer, aiidauser._dbuser) computed_calcs = update_running_calcs_status(authinfo) except Exception as e: msg = ("Error while updating calculation status " "for aiidauser={} on computer={}, " "error type is {}, error message: {}".format( aiidauser.email, computer.name, e.__class__.__name__, e.message)) execlogger.error(msg) # Continue with next computer continue
def retrieve_jobs(): from aiida.orm import JobCalculation, Computer from aiida.backends.utils import get_authinfo, QueryFactory qmanager = QueryFactory()() # I create a unique set of pairs (computer, aiidauser) computers_users_to_check = qmanager.query_jobcalculations_by_computer_user_state( state=calc_states.COMPUTED, only_computer_user_pairs=True, only_enabled=True) # I create a unique set of pairs (computer, aiidauser) #~ computers_users_to_check = list( #~ JobCalculation._get_all_with_state( #~ state=calc_states.COMPUTED, #~ only_computer_user_pairs=True, #~ only_enabled=True) #~ ) for computer, aiidauser in computers_users_to_check: execlogger.debug("({},{}) pair to check".format( aiidauser.email, computer.name)) try: authinfo = get_authinfo(computer.dbcomputer, aiidauser._dbuser) retrieve_computed_for_authinfo(authinfo) except Exception as e: msg = ("Error while retrieving calculation status for " "aiidauser={} on computer={}, " "error type is {}, error message: {}".format( aiidauser.email, computer.name, e.__class__.__name__, e.message)) execlogger.error(msg) # Continue with next computer continue
def test_statistics(self): """ Test if the statistics query works properly. I try to implement it in a way that does not depend on the past state. """ from aiida.backends.utils import QueryFactory from aiida.orm import Node, DataFactory, Calculation from collections import defaultdict def store_and_add(n, statistics): n.store() statistics['total'] += 1 statistics['types'][n._plugin_type_string] += 1 statistics['ctime_by_day'][n.ctime.strftime('%Y-%m-%d')] += 1 qmanager = QueryFactory()() current_db_statistics = qmanager.get_creation_statistics() types = defaultdict(int) types.update(current_db_statistics['types']) ctime_by_day = defaultdict(int) ctime_by_day.update(current_db_statistics['ctime_by_day']) expected_db_statistics = { 'total': current_db_statistics['total'], 'types': types, 'ctime_by_day': ctime_by_day } ParameterData = DataFactory('parameter') store_and_add(Node(), expected_db_statistics) store_and_add(ParameterData(), expected_db_statistics) store_and_add(ParameterData(), expected_db_statistics) store_and_add(Calculation(), expected_db_statistics) new_db_statistics = qmanager.get_creation_statistics() # I only check a few fields new_db_statistics = { k: v for k, v in new_db_statistics.iteritems() if k in expected_db_statistics } expected_db_statistics = { k: dict(v) if isinstance(v, defaultdict) else v for k, v in expected_db_statistics.iteritems() } self.assertEquals(new_db_statistics, expected_db_statistics)
def bands_list(elements, elements_only, raw, formulamode, past_days, groups, all_users): """ List bands objects """ # from aiida.orm.data.cif import CifData from aiida.backends.utils import QueryFactory from tabulate import tabulate from argparse import Namespace args = Namespace() args.element = elements args.element_only = elements_only args.formulamode = formulamode args.past_days = past_days args.group_name = None if groups is not None: args.group_pk = [group.id for group in groups] else: args.group_pk = None args.all_users = all_users query = QueryFactory()() entry_list = query.get_bands_and_parents_structure(args) counter = 0 bands_list_data = list() if not raw: bands_list_data.append(PROJECT_HEADERS) for entry in entry_list: for i, value in enumerate(entry): if isinstance(value, list): entry[i] = ",".join(value) for i in range(len(entry), len(PROJECT_HEADERS)): entry.append(None) counter += 1 bands_list_data.extend(entry_list) if raw: echo.echo(tabulate(bands_list_data, tablefmt='plain')) else: echo.echo(tabulate(bands_list_data, headers="firstrow")) echo.echo("\nTotal results: {}\n".format(counter))
def get_statistics(self, user_pk=None): """Return statistics for a given node""" from aiida.backends.utils import QueryFactory qmanager = QueryFactory()() return qmanager.get_creation_statistics(user_pk=user_pk)
def test_query_path(self): from aiida.orm.querybuilder import QueryBuilder from aiida.orm import Node from aiida.common.links import LinkType from aiida.backends.utils import QueryFactory q = QueryFactory()() n1 = Node() n1.label = 'n1' n1.store() n2 = Node() n2.label = 'n2' n2.store() n3 = Node() n3.label = 'n3' n3.store() n4 = Node() n4.label = 'n4' n4.store() n5 = Node() n5.label = 'n5' n5.store() n6 = Node() n6.label = 'n6' n6.store() n7 = Node() n7.label = 'n7' n7.store() n8 = Node() n8.label = 'n8' n8.store() n9 = Node() n9.label = 'n9' n9.store() # I create a strange graph, inserting links in a order # such that I often have to create the transitive closure # between two graphs # I set everything as an INPUT-links now, because the QueryBuilder path query or # our custom queries don't follow other links than CREATE or INPUT n3.add_link_from(n2, link_type=LinkType.INPUT) n2.add_link_from(n1, link_type=LinkType.INPUT) n5.add_link_from(n3, link_type=LinkType.INPUT) n5.add_link_from(n4, link_type=LinkType.INPUT) n4.add_link_from(n2, link_type=LinkType.INPUT) n7.add_link_from(n6, link_type=LinkType.INPUT) n8.add_link_from(n7, link_type=LinkType.INPUT) # There are no parents to n9, checking that self.assertEqual(set([]), set(q.get_all_parents([n9.pk]))) # There is one parent to n6 self.assertEqual(set([(_, ) for _ in (n6.pk, )]), set([tuple(_) for _ in q.get_all_parents([n7.pk])])) # There are several parents to n4 self.assertEqual(set([(_.pk, ) for _ in (n1, n2)]), set([tuple(_) for _ in q.get_all_parents([n4.pk])])) # There are several parents to n5 self.assertEqual(set([(_.pk, ) for _ in (n1, n2, n3, n4)]), set([tuple(_) for _ in q.get_all_parents([n5.pk])])) # Yet, no links from 1 to 8 self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n1.pk }, tag='anc').append(Node, descendant_of='anc', filters={ 'id': n8.pk }).count(), 0) self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n8.pk }, tag='desc').append(Node, ancestor_of='desc', filters={ 'id': n1.pk }).count(), 0) n6.add_link_from(n5, link_type=LinkType.INPUT) # Yet, now 2 links from 1 to 8 self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n1.pk }, tag='anc').append(Node, descendant_of='anc', filters={ 'id': n8.pk }).count(), 2) self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n8.pk }, tag='desc').append(Node, ancestor_of='desc', filters={ 'id': n1.pk }).count(), 2) self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n8.pk }, tag='desc').append( Node, ancestor_of='desc', filters={ 'id': n1.pk }, edge_filters={ 'depth': { '<': 6 } }, ).count(), 2) self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n8.pk }, tag='desc').append( Node, ancestor_of='desc', filters={ 'id': n1.pk }, edge_filters={ 'depth': 5 }, ).count(), 2) self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n8.pk }, tag='desc').append( Node, ancestor_of='desc', filters={ 'id': n1.pk }, edge_filters={ 'depth': { '<': 5 } }, ).count(), 0) # TODO write a query that can filter certain paths by traversed ID qb = QueryBuilder().append( Node, filters={ 'id': n8.pk }, tag='desc', ).append(Node, ancestor_of='desc', edge_project='path', filters={'id': n1.pk}) queried_path_set = set([frozenset(p) for p, in qb.all()]) paths_there_should_be = set([ frozenset([n1.pk, n2.pk, n3.pk, n5.pk, n6.pk, n7.pk, n8.pk]), frozenset([n1.pk, n2.pk, n4.pk, n5.pk, n6.pk, n7.pk, n8.pk]) ]) self.assertTrue(queried_path_set == paths_there_should_be) qb = QueryBuilder().append(Node, filters={ 'id': n1.pk }, tag='anc').append(Node, descendant_of='anc', filters={'id': n8.pk}, edge_project='path') self.assertTrue( set([frozenset(p) for p, in qb.all()]) == set([ frozenset([n1.pk, n2.pk, n3.pk, n5.pk, n6.pk, n7.pk, n8.pk]), frozenset([n1.pk, n2.pk, n4.pk, n5.pk, n6.pk, n7.pk, n8.pk]) ])) n7.add_link_from(n9, link_type=LinkType.INPUT) # Still two links... self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n1.pk }, tag='anc').append(Node, descendant_of='anc', filters={ 'id': n8.pk }).count(), 2) self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n8.pk }, tag='desc').append(Node, ancestor_of='desc', filters={ 'id': n1.pk }).count(), 2) n9.add_link_from(n6, link_type=LinkType.INPUT) # And now there should be 4 nodes self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n1.pk }, tag='anc').append(Node, descendant_of='anc', filters={ 'id': n8.pk }).count(), 4) self.assertEquals( QueryBuilder().append(Node, filters={ 'id': n8.pk }, tag='desc').append(Node, ancestor_of='desc', filters={ 'id': n1.pk }).count(), 4) qb = QueryBuilder().append(Node, filters={ 'id': n1.pk }, tag='anc').append(Node, descendant_of='anc', filters={'id': n8.pk}, edge_tag='edge') qb.add_projection('edge', 'depth') self.assertTrue(set(zip(*qb.all())[0]), set([5, 6])) qb.add_filter('edge', {'depth': 6}) self.assertTrue(set(zip(*qb.all())[0]), set([6]))
def retrieve_computed_for_authinfo(authinfo): from aiida.orm import JobCalculation from aiida.common.folders import SandboxFolder from aiida.orm.data.folder import FolderData from aiida.utils.logger import get_dblogger_extra from aiida.orm import DataFactory from aiida.backends.utils import QueryFactory import os if not authinfo.enabled: return qmanager = QueryFactory()() # I create a unique set of pairs (computer, aiidauser) calcs_to_retrieve = qmanager.query_jobcalculations_by_computer_user_state( state=calc_states.COMPUTED, computer=authinfo.dbcomputer, user=authinfo.aiidauser) retrieved = [] # I avoid to open an ssh connection if there are no # calcs with state not COMPUTED if len(calcs_to_retrieve): # Open connection with authinfo.get_transport() as t: for calc in calcs_to_retrieve: logger_extra = get_dblogger_extra(calc) t._set_logger_extra(logger_extra) try: calc._set_state(calc_states.RETRIEVING) except ModificationNotAllowed: # Someone else has already started to retrieve it, # just log and continue execlogger.debug("Attempting to retrieve more than once " "calculation {}: skipping!".format( calc.pk), extra=logger_extra) continue # with the next calculation to retrieve try: execlogger.debug("Retrieving calc {}".format(calc.pk), extra=logger_extra) workdir = calc._get_remote_workdir() retrieve_list = calc._get_retrieve_list() retrieve_singlefile_list = calc._get_retrieve_singlefile_list( ) execlogger.debug("[retrieval of calc {}] " "chdir {}".format(calc.pk, workdir), extra=logger_extra) t.chdir(workdir) retrieved_files = FolderData() retrieved_files.add_link_from( calc, label=calc._get_linkname_retrieved(), link_type=LinkType.CREATE) # First, retrieve the files of folderdata with SandboxFolder() as folder: for item in retrieve_list: # I have two possibilities: # * item is a string # * or is a list # then I have other two possibilities: # * there are file patterns # * or not # First decide the name of the files if isinstance(item, list): tmp_rname, tmp_lname, depth = item # if there are more than one file I do something differently if t.has_magic(tmp_rname): remote_names = t.glob(tmp_rname) local_names = [] for rem in remote_names: to_append = rem.split( os.path.sep )[-depth:] if depth > 0 else [] local_names.append( os.path.sep.join([tmp_lname] + to_append)) else: remote_names = [tmp_rname] to_append = remote_names.split( os.path.sep )[-depth:] if depth > 0 else [] local_names = [ os.path.sep.join([tmp_lname] + to_append) ] if depth > 1: # create directories in the folder, if needed for this_local_file in local_names: new_folder = os.path.join( folder.abspath, os.path.split(this_local_file)[0]) if not os.path.exists(new_folder): os.makedirs(new_folder) else: # it is a string if t.has_magic(item): remote_names = t.glob(item) local_names = [ os.path.split(rem)[1] for rem in remote_names ] else: remote_names = [item] local_names = [os.path.split(item)[1]] for rem, loc in zip(remote_names, local_names): execlogger.debug( "[retrieval of calc {}] " "Trying to retrieve remote item '{}'". format(calc.pk, rem), extra=logger_extra) t.get(rem, os.path.join(folder.abspath, loc), ignore_nonexisting=True) # Here I retrieved everything; # now I store them inside the calculation retrieved_files.replace_with_folder(folder.abspath, overwrite=True) # Second, retrieve the singlefiles with SandboxFolder() as folder: singlefile_list = [] for (linkname, subclassname, filename) in retrieve_singlefile_list: execlogger.debug( "[retrieval of calc {}] Trying " "to retrieve remote singlefile '{}'".format( calc.pk, filename), extra=logger_extra) localfilename = os.path.join( folder.abspath, os.path.split(filename)[1]) t.get(filename, localfilename, ignore_nonexisting=True) singlefile_list.append( (linkname, subclassname, localfilename)) # ignore files that have not been retrieved singlefile_list = [ i for i in singlefile_list if os.path.exists(i[2]) ] # after retrieving from the cluster, I create the objects singlefiles = [] for (linkname, subclassname, filename) in singlefile_list: SinglefileSubclass = DataFactory(subclassname) singlefile = SinglefileSubclass() singlefile.set_file(filename) singlefile.add_link_from(calc, label=linkname, link_type=LinkType.CREATE) singlefiles.append(singlefile) # Finally, store execlogger.debug("[retrieval of calc {}] " "Storing retrieved_files={}".format( calc.pk, retrieved_files.dbnode.pk), extra=logger_extra) retrieved_files.store() for fil in singlefiles: execlogger.debug( "[retrieval of calc {}] " "Storing retrieved_singlefile={}".format( calc.pk, fil.dbnode.pk), extra=logger_extra) fil.store() # If I was the one retrieving, I should also be the only # one parsing! I do not check calc._set_state(calc_states.PARSING) Parser = calc.get_parserclass() # If no parser is set, the calculation is successful successful = True if Parser is not None: # TODO: parse here parser = Parser(calc) successful, new_nodes_tuple = parser.parse_from_calc() for label, n in new_nodes_tuple: n.add_link_from(calc, label=label, link_type=LinkType.CREATE) n.store() if successful: try: calc._set_state(calc_states.FINISHED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass else: try: calc._set_state(calc_states.FAILED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass execlogger.error( "[parsing of calc {}] " "The parser returned an error, but it should have " "created an output node with some partial results " "and warnings. Check there for more information on " "the problem".format(calc.pk), extra=logger_extra) retrieved.append(calc) except Exception: import traceback tb = traceback.format_exc() newextradict = logger_extra.copy() newextradict['full_traceback'] = tb if calc.get_state() == calc_states.PARSING: execlogger.error("Error parsing calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) # TODO: add a 'comment' to the calculation try: calc._set_state(calc_states.PARSINGFAILED) except ModificationNotAllowed: pass else: execlogger.error("Error retrieving calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) try: calc._set_state(calc_states.RETRIEVALFAILED) except ModificationNotAllowed: pass raise return retrieved
def submit_jobs_with_authinfo(authinfo): """ Submit jobs in TOSUBMIT status belonging to user and machine as defined in the 'dbauthinfo' table. """ from aiida.orm import JobCalculation from aiida.utils.logger import get_dblogger_extra from aiida.backends.utils import QueryFactory if not authinfo.enabled: return execlogger.debug("Submitting jobs for user {} " "and machine {}".format(authinfo.aiidauser.email, authinfo.dbcomputer.name)) qmanager = QueryFactory()() # I create a unique set of pairs (computer, aiidauser) calcs_to_inquire = qmanager.query_jobcalculations_by_computer_user_state( state=calc_states.TOSUBMIT, computer=authinfo.dbcomputer, user=authinfo.aiidauser) # I avoid to open an ssh connection if there are # no calcs with state WITHSCHEDULER if len(calcs_to_inquire): # Open connection try: # I do it here so that the transport is opened only once per computer with authinfo.get_transport() as t: for c in calcs_to_inquire: logger_extra = get_dblogger_extra(c) t._set_logger_extra(logger_extra) try: submit_calc(calc=c, authinfo=authinfo, transport=t) except Exception as e: # TODO: implement a counter, after N retrials # set it to a status that # requires the user intervention execlogger.warning("There was an exception for " "calculation {} ({}): {}".format( c.pk, e.__class__.__name__, e.message)) # I just proceed to the next calculation continue # Catch exceptions also at this level (this happens only if there is # a problem opening the transport in the 'with t' statement, # because any other exception is caught and skipped above except Exception as e: import traceback from aiida.utils.logger import get_dblogger_extra for calc in calcs_to_inquire: logger_extra = get_dblogger_extra(calc) try: calc._set_state(calc_states.SUBMISSIONFAILED) except ModificationNotAllowed: # Someone already set it, just skip pass execlogger.error( "Submission of calc {} failed, check also the " "log file! Traceback: {}".format(calc.pk, traceback.format_exc()), extra=logger_extra) raise
def update_running_calcs_status(authinfo): """ Update the states of calculations in WITHSCHEDULER status belonging to user and machine as defined in the 'dbauthinfo' table. """ from aiida.orm import JobCalculation, Computer from aiida.scheduler.datastructures import JobInfo from aiida.utils.logger import get_dblogger_extra from aiida.backends.utils import QueryFactory if not authinfo.enabled: return execlogger.debug("Updating running calc status for user {} " "and machine {}".format(authinfo.aiidauser.email, authinfo.dbcomputer.name)) qmanager = QueryFactory()() calcs_to_inquire = qmanager.query_jobcalculations_by_computer_user_state( state=calc_states.WITHSCHEDULER, computer=authinfo.dbcomputer, user=authinfo.aiidauser) #~ calcs_to_inquire = list(JobCalculation._get_all_with_state( #~ state=calc_states.WITHSCHEDULER, #~ computer=authinfo.dbcomputer, #~ user=authinfo.aiidauser) #~ ) # NOTE: no further check is done that machine and # aiidauser are correct for each calc in calcs s = Computer(dbcomputer=authinfo.dbcomputer).get_scheduler() t = authinfo.get_transport() computed = [] # I avoid to open an ssh connection if there are # no calcs with state WITHSCHEDULER if len(calcs_to_inquire): jobids_to_inquire = [str(c.get_job_id()) for c in calcs_to_inquire] # Open connection with t: s.set_transport(t) # TODO: Check if we are ok with filtering by job (to make this work, # I had to remove the check on the retval for getJobs, # because if the job has computed and is not in the output of # qstat, it gives a nonzero retval) # TODO: catch SchedulerError exception and do something # sensible (at least, skip this computer but continue with # following ones, and set a counter; set calculations to # UNKNOWN after a while? if s.get_feature('can_query_by_user'): found_jobs = s.getJobs(user="******", as_dict=True) else: found_jobs = s.getJobs(jobs=jobids_to_inquire, as_dict=True) # I update the status of jobs for c in calcs_to_inquire: try: logger_extra = get_dblogger_extra(c) t._set_logger_extra(logger_extra) jobid = c.get_job_id() if jobid is None: execlogger.error("JobCalculation {} is WITHSCHEDULER " "but no job id was found!".format( c.pk), extra=logger_extra) continue # I check if the calculation to be checked (c) # is in the output of qstat if jobid in found_jobs: # jobinfo: the information returned by # qstat for this job jobinfo = found_jobs[jobid] execlogger.debug("Inquirying calculation {} (jobid " "{}): it has job_state={}".format( c.pk, jobid, jobinfo.job_state), extra=logger_extra) # For the moment, FAILED is not defined if jobinfo.job_state in [job_states.DONE ]: # , job_states.FAILED]: computed.append(c) try: c._set_state(calc_states.COMPUTED) except ModificationNotAllowed: # Someone already set it, just skip pass ## Do not set the WITHSCHEDULER state multiple times, ## this would raise a ModificationNotAllowed # else: # c._set_state(calc_states.WITHSCHEDULER) c._set_scheduler_state(jobinfo.job_state) c._set_last_jobinfo(jobinfo) else: execlogger.debug("Inquirying calculation {} (jobid " "{}): not found, assuming " "job_state={}".format( c.pk, jobid, job_states.DONE), extra=logger_extra) # calculation c is not found in the output of qstat computed.append(c) c._set_scheduler_state(job_states.DONE) except Exception as e: # TODO: implement a counter, after N retrials # set it to a status that # requires the user intervention execlogger.warning("There was an exception for " "calculation {} ({}): {}".format( c.pk, e.__class__.__name__, e.message), extra=logger_extra) continue for c in computed: try: logger_extra = get_dblogger_extra(c) try: detailed_jobinfo = s.get_detailed_jobinfo( jobid=c.get_job_id()) except NotImplementedError: detailed_jobinfo = ( u"AiiDA MESSAGE: This scheduler does not implement " u"the routine get_detailed_jobinfo to retrieve " u"the information on " u"a job after it has finished.") last_jobinfo = c._get_last_jobinfo() if last_jobinfo is None: last_jobinfo = JobInfo() last_jobinfo.job_id = c.get_job_id() last_jobinfo.job_state = job_states.DONE last_jobinfo.detailedJobinfo = detailed_jobinfo c._set_last_jobinfo(last_jobinfo) except Exception as e: execlogger.warning("There was an exception while " "retrieving the detailed jobinfo " "for calculation {} ({}): {}".format( c.pk, e.__class__.__name__, e.message), extra=logger_extra) continue finally: # Set the state to COMPUTED as the very last thing # of this routine; no further change should be done after # this, so that in general the retriever can just # poll for this state, if we want to. try: c._set_state(calc_states.COMPUTED) except ModificationNotAllowed: # Someone already set it, just skip pass return computed
def submit_jobs(): """ Submit all jobs in the TOSUBMIT state. """ from aiida.orm import JobCalculation, Computer, User from aiida.utils.logger import get_dblogger_extra from aiida.backends.utils import get_authinfo, QueryFactory qmanager = QueryFactory()() # I create a unique set of pairs (computer, aiidauser) computers_users_to_check = qmanager.query_jobcalculations_by_computer_user_state( state=calc_states.TOSUBMIT, only_computer_user_pairs=True, only_enabled=True) for computer, aiidauser in computers_users_to_check: execlogger.debug("({},{}) pair to submit".format( aiidauser.email, computer.name)) try: try: authinfo = get_authinfo(computer.dbcomputer, aiidauser._dbuser) except AuthenticationError: # TODO!! # Put each calculation in the SUBMISSIONFAILED state because # I do not have AuthInfo to submit them calcs_to_inquire = qmanager.query_jobcalculations_by_computer_user_state( state=calc_states.TOSUBMIT, computer=computer, user=aiidauser) #~ calcs_to_inquire = JobCalculation._get_all_with_state( #~ state=calc_states.TOSUBMIT, #~ computer=computer, user=aiidauser) for calc in calcs_to_inquire: try: calc._set_state(calc_states.SUBMISSIONFAILED) except ModificationNotAllowed: # Someone already set it, just skip pass logger_extra = get_dblogger_extra(calc) execlogger.error("Submission of calc {} failed, " "computer pk= {} ({}) is not configured " "for aiidauser {}".format( calc.pk, computer.pk, computer.get_name(), aiidauser.email), extra=logger_extra) # Go to the next (dbcomputer,aiidauser) pair continue submitted_calcs = submit_jobs_with_authinfo(authinfo) except Exception as e: import traceback msg = ("Error while submitting jobs " "for aiidauser={} on computer={}, " "error type is {}, traceback: {}".format( aiidauser.email, computer.name, e.__class__.__name__, traceback.format_exc())) print msg execlogger.error(msg) # Continue with next computer continue
def retrieve_computed_for_authinfo(authinfo): from aiida.orm import JobCalculation from aiida.common.folders import SandboxFolder from aiida.orm.data.folder import FolderData from aiida.common.log import get_dblogger_extra from aiida.orm import DataFactory from aiida.backends.utils import QueryFactory import os if not authinfo.enabled: return qmanager = QueryFactory()() # I create a unique set of pairs (computer, aiidauser) calcs_to_retrieve = qmanager.query_jobcalculations_by_computer_user_state( state=calc_states.COMPUTED, computer=authinfo.dbcomputer, user=authinfo.aiidauser) retrieved = [] # I avoid to open an ssh connection if there are no # calcs with state not COMPUTED if len(calcs_to_retrieve): # Open connection with authinfo.get_transport() as t: for calc in calcs_to_retrieve: logger_extra = get_dblogger_extra(calc) t._set_logger_extra(logger_extra) try: calc._set_state(calc_states.RETRIEVING) except ModificationNotAllowed: # Someone else has already started to retrieve it, # just log and continue execlogger.debug("Attempting to retrieve more than once " "calculation {}: skipping!".format( calc.pk), extra=logger_extra) continue # with the next calculation to retrieve try: execlogger.debug("Retrieving calc {}".format(calc.pk), extra=logger_extra) workdir = calc._get_remote_workdir() retrieve_list = calc._get_retrieve_list() retrieve_temporary_list = calc._get_retrieve_temporary_list( ) retrieve_singlefile_list = calc._get_retrieve_singlefile_list( ) execlogger.debug("[retrieval of calc {}] " "chdir {}".format(calc.pk, workdir), extra=logger_extra) t.chdir(workdir) retrieved_files = FolderData() retrieved_files.add_link_from( calc, label=calc._get_linkname_retrieved(), link_type=LinkType.CREATE) # First, retrieve the files of folderdata with SandboxFolder() as folder: retrieve_files_from_list(calc, t, folder, retrieve_list) # Here I retrieved everything; now I store them inside the calculation retrieved_files.replace_with_folder(folder.abspath, overwrite=True) # Second, retrieve the singlefiles with SandboxFolder() as folder: singlefile_list = [] for (linkname, subclassname, filename) in retrieve_singlefile_list: execlogger.debug( "[retrieval of calc {}] Trying " "to retrieve remote singlefile '{}'".format( calc.pk, filename), extra=logger_extra) localfilename = os.path.join( folder.abspath, os.path.split(filename)[1]) t.get(filename, localfilename, ignore_nonexisting=True) singlefile_list.append( (linkname, subclassname, localfilename)) # ignore files that have not been retrieved singlefile_list = [ i for i in singlefile_list if os.path.exists(i[2]) ] # after retrieving from the cluster, I create the objects singlefiles = [] for (linkname, subclassname, filename) in singlefile_list: SinglefileSubclass = DataFactory(subclassname) singlefile = SinglefileSubclass() singlefile.set_file(filename) singlefile.add_link_from(calc, label=linkname, link_type=LinkType.CREATE) singlefiles.append(singlefile) # Retrieve the temporary files in a separate temporary folder if any files were # specified in the 'retrieve_temporary_list' key if retrieve_temporary_list: retrieved_temporary_folder = FolderData() with SandboxFolder() as folder: retrieve_files_from_list(calc, t, folder, retrieve_temporary_list) retrieved_temporary_folder.replace_with_folder( folder.abspath, overwrite=True) # Log the files that were retrieved in the temporary folder for entry in retrieved_temporary_folder.get_folder_list( ): execlogger.debug( "[retrieval of calc {}] Retrieved temporary file or folder '{}'" .format(calc.pk, entry), extra=logger_extra) else: retrieved_temporary_folder = None # Finally, store the retrieved_files node. The retrieved_temporary_folder node # is explicitly not stored, but will just be passed to the parser.parse_from calc call execlogger.debug( "[retrieval of calc {}] Storing retrieved_files={}". format(calc.pk, retrieved_files.dbnode.pk), extra=logger_extra) retrieved_files.store() for fil in singlefiles: execlogger.debug( "[retrieval of calc {}] Storing retrieved_singlefile={}" .format(calc.pk, fil.dbnode.pk), extra=logger_extra) fil.store() # If I was the one retrieving, I should also be the only one parsing! I do not check calc._set_state(calc_states.PARSING) Parser = calc.get_parserclass() # If no parser is set, the calculation is successful successful = True if Parser is not None: parser = Parser(calc) successful, new_nodes_tuple = parser.parse_from_calc( retrieved_temporary_folder) for label, n in new_nodes_tuple: n.add_link_from(calc, label=label, link_type=LinkType.CREATE) n.store() if successful: try: calc._set_state(calc_states.FINISHED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass else: try: calc._set_state(calc_states.FAILED) except ModificationNotAllowed: # I should have been the only one to set it, but # in order to avoid unuseful error messages, I # just ignore pass execlogger.error( "[parsing of calc {}] " "The parser returned an error, but it should have " "created an output node with some partial results " "and warnings. Check there for more information on " "the problem".format(calc.pk), extra=logger_extra) retrieved.append(calc) except Exception: import traceback tb = traceback.format_exc() newextradict = logger_extra.copy() newextradict['full_traceback'] = tb if calc.get_state() == calc_states.PARSING: execlogger.error("Error parsing calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) # TODO: add a 'comment' to the calculation try: calc._set_state(calc_states.PARSINGFAILED) except ModificationNotAllowed: pass else: execlogger.error("Error retrieving calc {}. " "Traceback: {}".format(calc.pk, tb), extra=newextradict) try: calc._set_state(calc_states.RETRIEVALFAILED) except ModificationNotAllowed: pass raise return retrieved