示例#1
0
def update_jobs():
    """
    calls an update for each set of pairs (machine, aiidauser)
    """
    from aiida.orm import JobCalculation, Computer, User
    from aiida.backends.utils import get_authinfo, QueryFactory

    qmanager = QueryFactory()()
    # I create a unique set of pairs (computer, aiidauser)
    computers_users_to_check = qmanager.query_jobcalculations_by_computer_user_state(
        state=calc_states.WITHSCHEDULER,
        only_computer_user_pairs=True,
        only_enabled=True)

    for computer, aiidauser in computers_users_to_check:

        execlogger.debug("({},{}) pair to check".format(
            aiidauser.email, computer.name))

        try:
            authinfo = get_authinfo(computer.dbcomputer, aiidauser._dbuser)
            computed_calcs = update_running_calcs_status(authinfo)
        except Exception as e:
            msg = ("Error while updating calculation status "
                   "for aiidauser={} on computer={}, "
                   "error type is {}, error message: {}".format(
                       aiidauser.email, computer.name, e.__class__.__name__,
                       e.message))
            execlogger.error(msg)
            # Continue with next computer
            continue
示例#2
0
def retrieve_jobs():
    from aiida.orm import JobCalculation, Computer
    from aiida.backends.utils import get_authinfo, QueryFactory

    qmanager = QueryFactory()()
    # I create a unique set of pairs (computer, aiidauser)
    computers_users_to_check = qmanager.query_jobcalculations_by_computer_user_state(
        state=calc_states.COMPUTED,
        only_computer_user_pairs=True,
        only_enabled=True)

    # I create a unique set of pairs (computer, aiidauser)
    #~ computers_users_to_check = list(
    #~ JobCalculation._get_all_with_state(
    #~ state=calc_states.COMPUTED,
    #~ only_computer_user_pairs=True,
    #~ only_enabled=True)
    #~ )

    for computer, aiidauser in computers_users_to_check:
        execlogger.debug("({},{}) pair to check".format(
            aiidauser.email, computer.name))
        try:
            authinfo = get_authinfo(computer.dbcomputer, aiidauser._dbuser)
            retrieve_computed_for_authinfo(authinfo)
        except Exception as e:
            msg = ("Error while retrieving calculation status for "
                   "aiidauser={} on computer={}, "
                   "error type is {}, error message: {}".format(
                       aiidauser.email, computer.name, e.__class__.__name__,
                       e.message))
            execlogger.error(msg)
            # Continue with next computer
            continue
示例#3
0
    def test_statistics(self):
        """
        Test if the statistics query works properly.

        I try to implement it in a way that does not depend on the past state.
        """
        from aiida.backends.utils import QueryFactory
        from aiida.orm import Node, DataFactory, Calculation
        from collections import defaultdict

        def store_and_add(n, statistics):
            n.store()
            statistics['total'] += 1
            statistics['types'][n._plugin_type_string] += 1
            statistics['ctime_by_day'][n.ctime.strftime('%Y-%m-%d')] += 1

        qmanager = QueryFactory()()
        current_db_statistics = qmanager.get_creation_statistics()
        types = defaultdict(int)
        types.update(current_db_statistics['types'])
        ctime_by_day = defaultdict(int)
        ctime_by_day.update(current_db_statistics['ctime_by_day'])

        expected_db_statistics = {
            'total': current_db_statistics['total'],
            'types': types,
            'ctime_by_day': ctime_by_day
        }

        ParameterData = DataFactory('parameter')

        store_and_add(Node(), expected_db_statistics)
        store_and_add(ParameterData(), expected_db_statistics)
        store_and_add(ParameterData(), expected_db_statistics)
        store_and_add(Calculation(), expected_db_statistics)

        new_db_statistics = qmanager.get_creation_statistics()
        # I only check a few fields
        new_db_statistics = {
            k: v
            for k, v in new_db_statistics.iteritems()
            if k in expected_db_statistics
        }

        expected_db_statistics = {
            k: dict(v) if isinstance(v, defaultdict) else v
            for k, v in expected_db_statistics.iteritems()
        }

        self.assertEquals(new_db_statistics, expected_db_statistics)
示例#4
0
def bands_list(elements, elements_only, raw, formulamode, past_days, groups,
               all_users):
    """
    List bands objects
    """
    # from aiida.orm.data.cif import CifData
    from aiida.backends.utils import QueryFactory
    from tabulate import tabulate
    from argparse import Namespace

    args = Namespace()
    args.element = elements
    args.element_only = elements_only
    args.formulamode = formulamode
    args.past_days = past_days
    args.group_name = None
    if groups is not None:
        args.group_pk = [group.id for group in groups]
    else:
        args.group_pk = None
    args.all_users = all_users

    query = QueryFactory()()
    entry_list = query.get_bands_and_parents_structure(args)

    counter = 0
    bands_list_data = list()
    if not raw:
        bands_list_data.append(PROJECT_HEADERS)
    for entry in entry_list:
        for i, value in enumerate(entry):
            if isinstance(value, list):
                entry[i] = ",".join(value)
        for i in range(len(entry), len(PROJECT_HEADERS)):
            entry.append(None)
        counter += 1
    bands_list_data.extend(entry_list)
    if raw:
        echo.echo(tabulate(bands_list_data, tablefmt='plain'))
    else:
        echo.echo(tabulate(bands_list_data, headers="firstrow"))
        echo.echo("\nTotal results: {}\n".format(counter))
示例#5
0
    def get_statistics(self, user_pk=None):
        """Return statistics for a given node"""

        from aiida.backends.utils import QueryFactory
        qmanager = QueryFactory()()
        return qmanager.get_creation_statistics(user_pk=user_pk)
示例#6
0
    def test_query_path(self):
        from aiida.orm.querybuilder import QueryBuilder
        from aiida.orm import Node
        from aiida.common.links import LinkType
        from aiida.backends.utils import QueryFactory

        q = QueryFactory()()
        n1 = Node()
        n1.label = 'n1'
        n1.store()
        n2 = Node()
        n2.label = 'n2'
        n2.store()
        n3 = Node()
        n3.label = 'n3'
        n3.store()
        n4 = Node()
        n4.label = 'n4'
        n4.store()
        n5 = Node()
        n5.label = 'n5'
        n5.store()
        n6 = Node()
        n6.label = 'n6'
        n6.store()
        n7 = Node()
        n7.label = 'n7'
        n7.store()
        n8 = Node()
        n8.label = 'n8'
        n8.store()
        n9 = Node()
        n9.label = 'n9'
        n9.store()

        # I create a strange graph, inserting links in a order
        # such that I often have to create the transitive closure
        # between two graphs
        # I set everything as an INPUT-links now, because the QueryBuilder path query or
        # our custom queries don't follow other links than CREATE or INPUT
        n3.add_link_from(n2, link_type=LinkType.INPUT)
        n2.add_link_from(n1, link_type=LinkType.INPUT)
        n5.add_link_from(n3, link_type=LinkType.INPUT)
        n5.add_link_from(n4, link_type=LinkType.INPUT)
        n4.add_link_from(n2, link_type=LinkType.INPUT)
        n7.add_link_from(n6, link_type=LinkType.INPUT)
        n8.add_link_from(n7, link_type=LinkType.INPUT)

        # There are no parents to n9, checking that
        self.assertEqual(set([]), set(q.get_all_parents([n9.pk])))
        # There is one parent to n6
        self.assertEqual(set([(_, ) for _ in (n6.pk, )]),
                         set([tuple(_) for _ in q.get_all_parents([n7.pk])]))
        # There are several parents to n4
        self.assertEqual(set([(_.pk, ) for _ in (n1, n2)]),
                         set([tuple(_) for _ in q.get_all_parents([n4.pk])]))
        # There are several parents to n5
        self.assertEqual(set([(_.pk, ) for _ in (n1, n2, n3, n4)]),
                         set([tuple(_) for _ in q.get_all_parents([n5.pk])]))

        # Yet, no links from 1 to 8
        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n1.pk
            }, tag='anc').append(Node,
                                 descendant_of='anc',
                                 filters={
                                     'id': n8.pk
                                 }).count(), 0)

        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n8.pk
            }, tag='desc').append(Node,
                                  ancestor_of='desc',
                                  filters={
                                      'id': n1.pk
                                  }).count(), 0)

        n6.add_link_from(n5, link_type=LinkType.INPUT)
        # Yet, now 2 links from 1 to 8
        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n1.pk
            }, tag='anc').append(Node,
                                 descendant_of='anc',
                                 filters={
                                     'id': n8.pk
                                 }).count(), 2)

        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n8.pk
            }, tag='desc').append(Node,
                                  ancestor_of='desc',
                                  filters={
                                      'id': n1.pk
                                  }).count(), 2)

        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n8.pk
            }, tag='desc').append(
                Node,
                ancestor_of='desc',
                filters={
                    'id': n1.pk
                },
                edge_filters={
                    'depth': {
                        '<': 6
                    }
                },
            ).count(), 2)
        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n8.pk
            }, tag='desc').append(
                Node,
                ancestor_of='desc',
                filters={
                    'id': n1.pk
                },
                edge_filters={
                    'depth': 5
                },
            ).count(), 2)
        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n8.pk
            }, tag='desc').append(
                Node,
                ancestor_of='desc',
                filters={
                    'id': n1.pk
                },
                edge_filters={
                    'depth': {
                        '<': 5
                    }
                },
            ).count(), 0)

        # TODO write a query that can filter certain paths by traversed ID
        qb = QueryBuilder().append(
            Node,
            filters={
                'id': n8.pk
            },
            tag='desc',
        ).append(Node,
                 ancestor_of='desc',
                 edge_project='path',
                 filters={'id': n1.pk})
        queried_path_set = set([frozenset(p) for p, in qb.all()])

        paths_there_should_be = set([
            frozenset([n1.pk, n2.pk, n3.pk, n5.pk, n6.pk, n7.pk, n8.pk]),
            frozenset([n1.pk, n2.pk, n4.pk, n5.pk, n6.pk, n7.pk, n8.pk])
        ])

        self.assertTrue(queried_path_set == paths_there_should_be)

        qb = QueryBuilder().append(Node, filters={
            'id': n1.pk
        }, tag='anc').append(Node,
                             descendant_of='anc',
                             filters={'id': n8.pk},
                             edge_project='path')

        self.assertTrue(
            set([frozenset(p) for p, in qb.all()]) == set([
                frozenset([n1.pk, n2.pk, n3.pk, n5.pk, n6.pk, n7.pk, n8.pk]),
                frozenset([n1.pk, n2.pk, n4.pk, n5.pk, n6.pk, n7.pk, n8.pk])
            ]))

        n7.add_link_from(n9, link_type=LinkType.INPUT)
        # Still two links...

        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n1.pk
            }, tag='anc').append(Node,
                                 descendant_of='anc',
                                 filters={
                                     'id': n8.pk
                                 }).count(), 2)

        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n8.pk
            }, tag='desc').append(Node,
                                  ancestor_of='desc',
                                  filters={
                                      'id': n1.pk
                                  }).count(), 2)
        n9.add_link_from(n6, link_type=LinkType.INPUT)
        # And now there should be 4 nodes

        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n1.pk
            }, tag='anc').append(Node,
                                 descendant_of='anc',
                                 filters={
                                     'id': n8.pk
                                 }).count(), 4)

        self.assertEquals(
            QueryBuilder().append(Node, filters={
                'id': n8.pk
            }, tag='desc').append(Node,
                                  ancestor_of='desc',
                                  filters={
                                      'id': n1.pk
                                  }).count(), 4)

        qb = QueryBuilder().append(Node, filters={
            'id': n1.pk
        }, tag='anc').append(Node,
                             descendant_of='anc',
                             filters={'id': n8.pk},
                             edge_tag='edge')
        qb.add_projection('edge', 'depth')
        self.assertTrue(set(zip(*qb.all())[0]), set([5, 6]))
        qb.add_filter('edge', {'depth': 6})
        self.assertTrue(set(zip(*qb.all())[0]), set([6]))
示例#7
0
def retrieve_computed_for_authinfo(authinfo):
    from aiida.orm import JobCalculation
    from aiida.common.folders import SandboxFolder
    from aiida.orm.data.folder import FolderData
    from aiida.utils.logger import get_dblogger_extra
    from aiida.orm import DataFactory

    from aiida.backends.utils import QueryFactory

    import os

    if not authinfo.enabled:
        return

    qmanager = QueryFactory()()
    # I create a unique set of pairs (computer, aiidauser)
    calcs_to_retrieve = qmanager.query_jobcalculations_by_computer_user_state(
        state=calc_states.COMPUTED,
        computer=authinfo.dbcomputer,
        user=authinfo.aiidauser)

    retrieved = []

    # I avoid to open an ssh connection if there are no
    # calcs with state not COMPUTED
    if len(calcs_to_retrieve):

        # Open connection
        with authinfo.get_transport() as t:
            for calc in calcs_to_retrieve:
                logger_extra = get_dblogger_extra(calc)
                t._set_logger_extra(logger_extra)

                try:
                    calc._set_state(calc_states.RETRIEVING)
                except ModificationNotAllowed:
                    # Someone else has already started to retrieve it,
                    # just log and continue
                    execlogger.debug("Attempting to retrieve more than once "
                                     "calculation {}: skipping!".format(
                                         calc.pk),
                                     extra=logger_extra)
                    continue  # with the next calculation to retrieve
                try:
                    execlogger.debug("Retrieving calc {}".format(calc.pk),
                                     extra=logger_extra)
                    workdir = calc._get_remote_workdir()
                    retrieve_list = calc._get_retrieve_list()
                    retrieve_singlefile_list = calc._get_retrieve_singlefile_list(
                    )
                    execlogger.debug("[retrieval of calc {}] "
                                     "chdir {}".format(calc.pk, workdir),
                                     extra=logger_extra)
                    t.chdir(workdir)

                    retrieved_files = FolderData()
                    retrieved_files.add_link_from(
                        calc,
                        label=calc._get_linkname_retrieved(),
                        link_type=LinkType.CREATE)

                    # First, retrieve the files of folderdata
                    with SandboxFolder() as folder:
                        for item in retrieve_list:
                            # I have two possibilities:
                            # * item is a string
                            # * or is a list
                            # then I have other two possibilities:
                            # * there are file patterns
                            # * or not
                            # First decide the name of the files
                            if isinstance(item, list):
                                tmp_rname, tmp_lname, depth = item
                                # if there are more than one file I do something differently
                                if t.has_magic(tmp_rname):
                                    remote_names = t.glob(tmp_rname)
                                    local_names = []
                                    for rem in remote_names:
                                        to_append = rem.split(
                                            os.path.sep
                                        )[-depth:] if depth > 0 else []
                                        local_names.append(
                                            os.path.sep.join([tmp_lname] +
                                                             to_append))
                                else:
                                    remote_names = [tmp_rname]
                                    to_append = remote_names.split(
                                        os.path.sep
                                    )[-depth:] if depth > 0 else []
                                    local_names = [
                                        os.path.sep.join([tmp_lname] +
                                                         to_append)
                                    ]
                                if depth > 1:  # create directories in the folder, if needed
                                    for this_local_file in local_names:
                                        new_folder = os.path.join(
                                            folder.abspath,
                                            os.path.split(this_local_file)[0])
                                        if not os.path.exists(new_folder):
                                            os.makedirs(new_folder)
                            else:  # it is a string
                                if t.has_magic(item):
                                    remote_names = t.glob(item)
                                    local_names = [
                                        os.path.split(rem)[1]
                                        for rem in remote_names
                                    ]
                                else:
                                    remote_names = [item]
                                    local_names = [os.path.split(item)[1]]

                            for rem, loc in zip(remote_names, local_names):
                                execlogger.debug(
                                    "[retrieval of calc {}] "
                                    "Trying to retrieve remote item '{}'".
                                    format(calc.pk, rem),
                                    extra=logger_extra)
                                t.get(rem,
                                      os.path.join(folder.abspath, loc),
                                      ignore_nonexisting=True)

                        # Here I retrieved everything;
                        # now I store them inside the calculation
                        retrieved_files.replace_with_folder(folder.abspath,
                                                            overwrite=True)

                    # Second, retrieve the singlefiles
                    with SandboxFolder() as folder:
                        singlefile_list = []
                        for (linkname, subclassname,
                             filename) in retrieve_singlefile_list:
                            execlogger.debug(
                                "[retrieval of calc {}] Trying "
                                "to retrieve remote singlefile '{}'".format(
                                    calc.pk, filename),
                                extra=logger_extra)
                            localfilename = os.path.join(
                                folder.abspath,
                                os.path.split(filename)[1])
                            t.get(filename,
                                  localfilename,
                                  ignore_nonexisting=True)
                            singlefile_list.append(
                                (linkname, subclassname, localfilename))

                        # ignore files that have not been retrieved
                        singlefile_list = [
                            i for i in singlefile_list if os.path.exists(i[2])
                        ]

                        # after retrieving from the cluster, I create the objects
                        singlefiles = []
                        for (linkname, subclassname,
                             filename) in singlefile_list:
                            SinglefileSubclass = DataFactory(subclassname)
                            singlefile = SinglefileSubclass()
                            singlefile.set_file(filename)
                            singlefile.add_link_from(calc,
                                                     label=linkname,
                                                     link_type=LinkType.CREATE)
                            singlefiles.append(singlefile)

                    # Finally, store
                    execlogger.debug("[retrieval of calc {}] "
                                     "Storing retrieved_files={}".format(
                                         calc.pk, retrieved_files.dbnode.pk),
                                     extra=logger_extra)
                    retrieved_files.store()
                    for fil in singlefiles:
                        execlogger.debug(
                            "[retrieval of calc {}] "
                            "Storing retrieved_singlefile={}".format(
                                calc.pk, fil.dbnode.pk),
                            extra=logger_extra)
                        fil.store()

                    # If I was the one retrieving, I should also be the only
                    # one parsing! I do not check
                    calc._set_state(calc_states.PARSING)

                    Parser = calc.get_parserclass()
                    # If no parser is set, the calculation is successful
                    successful = True
                    if Parser is not None:
                        # TODO: parse here
                        parser = Parser(calc)
                        successful, new_nodes_tuple = parser.parse_from_calc()

                        for label, n in new_nodes_tuple:
                            n.add_link_from(calc,
                                            label=label,
                                            link_type=LinkType.CREATE)
                            n.store()

                    if successful:
                        try:
                            calc._set_state(calc_states.FINISHED)
                        except ModificationNotAllowed:
                            # I should have been the only one to set it, but
                            # in order to avoid unuseful error messages, I
                            # just ignore
                            pass
                    else:
                        try:
                            calc._set_state(calc_states.FAILED)
                        except ModificationNotAllowed:
                            # I should have been the only one to set it, but
                            # in order to avoid unuseful error messages, I
                            # just ignore
                            pass
                        execlogger.error(
                            "[parsing of calc {}] "
                            "The parser returned an error, but it should have "
                            "created an output node with some partial results "
                            "and warnings. Check there for more information on "
                            "the problem".format(calc.pk),
                            extra=logger_extra)
                    retrieved.append(calc)
                except Exception:
                    import traceback

                    tb = traceback.format_exc()
                    newextradict = logger_extra.copy()
                    newextradict['full_traceback'] = tb
                    if calc.get_state() == calc_states.PARSING:
                        execlogger.error("Error parsing calc {}. "
                                         "Traceback: {}".format(calc.pk, tb),
                                         extra=newextradict)
                        # TODO: add a 'comment' to the calculation
                        try:
                            calc._set_state(calc_states.PARSINGFAILED)
                        except ModificationNotAllowed:
                            pass
                    else:
                        execlogger.error("Error retrieving calc {}. "
                                         "Traceback: {}".format(calc.pk, tb),
                                         extra=newextradict)
                        try:
                            calc._set_state(calc_states.RETRIEVALFAILED)
                        except ModificationNotAllowed:
                            pass
                        raise

    return retrieved
示例#8
0
def submit_jobs_with_authinfo(authinfo):
    """
    Submit jobs in TOSUBMIT status belonging
    to user and machine as defined in the 'dbauthinfo' table.
    """
    from aiida.orm import JobCalculation
    from aiida.utils.logger import get_dblogger_extra

    from aiida.backends.utils import QueryFactory

    if not authinfo.enabled:
        return

    execlogger.debug("Submitting jobs for user {} "
                     "and machine {}".format(authinfo.aiidauser.email,
                                             authinfo.dbcomputer.name))

    qmanager = QueryFactory()()
    # I create a unique set of pairs (computer, aiidauser)
    calcs_to_inquire = qmanager.query_jobcalculations_by_computer_user_state(
        state=calc_states.TOSUBMIT,
        computer=authinfo.dbcomputer,
        user=authinfo.aiidauser)

    # I avoid to open an ssh connection if there are
    # no calcs with state WITHSCHEDULER
    if len(calcs_to_inquire):
        # Open connection
        try:
            # I do it here so that the transport is opened only once per computer
            with authinfo.get_transport() as t:
                for c in calcs_to_inquire:
                    logger_extra = get_dblogger_extra(c)
                    t._set_logger_extra(logger_extra)

                    try:
                        submit_calc(calc=c, authinfo=authinfo, transport=t)
                    except Exception as e:
                        # TODO: implement a counter, after N retrials
                        # set it to a status that
                        # requires the user intervention
                        execlogger.warning("There was an exception for "
                                           "calculation {} ({}): {}".format(
                                               c.pk, e.__class__.__name__,
                                               e.message))
                        # I just proceed to the next calculation
                        continue
        # Catch exceptions also at this level (this happens only if there is
        # a problem opening the transport in the 'with t' statement,
        # because any other exception is caught and skipped above
        except Exception as e:
            import traceback
            from aiida.utils.logger import get_dblogger_extra

            for calc in calcs_to_inquire:
                logger_extra = get_dblogger_extra(calc)
                try:
                    calc._set_state(calc_states.SUBMISSIONFAILED)
                except ModificationNotAllowed:
                    # Someone already set it, just skip
                    pass

                execlogger.error(
                    "Submission of calc {} failed, check also the "
                    "log file! Traceback: {}".format(calc.pk,
                                                     traceback.format_exc()),
                    extra=logger_extra)
            raise
示例#9
0
def update_running_calcs_status(authinfo):
    """
    Update the states of calculations in WITHSCHEDULER status belonging
    to user and machine as defined in the 'dbauthinfo' table.
    """
    from aiida.orm import JobCalculation, Computer
    from aiida.scheduler.datastructures import JobInfo
    from aiida.utils.logger import get_dblogger_extra
    from aiida.backends.utils import QueryFactory

    if not authinfo.enabled:
        return

    execlogger.debug("Updating running calc status for user {} "
                     "and machine {}".format(authinfo.aiidauser.email,
                                             authinfo.dbcomputer.name))

    qmanager = QueryFactory()()
    calcs_to_inquire = qmanager.query_jobcalculations_by_computer_user_state(
        state=calc_states.WITHSCHEDULER,
        computer=authinfo.dbcomputer,
        user=authinfo.aiidauser)

    #~ calcs_to_inquire = list(JobCalculation._get_all_with_state(
    #~ state=calc_states.WITHSCHEDULER,
    #~ computer=authinfo.dbcomputer,
    #~ user=authinfo.aiidauser)
    #~ )

    # NOTE: no further check is done that machine and
    # aiidauser are correct for each calc in calcs
    s = Computer(dbcomputer=authinfo.dbcomputer).get_scheduler()
    t = authinfo.get_transport()

    computed = []

    # I avoid to open an ssh connection if there are
    # no calcs with state WITHSCHEDULER
    if len(calcs_to_inquire):
        jobids_to_inquire = [str(c.get_job_id()) for c in calcs_to_inquire]

        # Open connection
        with t:
            s.set_transport(t)
            # TODO: Check if we are ok with filtering by job (to make this work,
            # I had to remove the check on the retval for getJobs,
            # because if the job has computed and is not in the output of
            # qstat, it gives a nonzero retval)

            # TODO: catch SchedulerError exception and do something
            # sensible (at least, skip this computer but continue with
            # following ones, and set a counter; set calculations to
            # UNKNOWN after a while?
            if s.get_feature('can_query_by_user'):
                found_jobs = s.getJobs(user="******", as_dict=True)
            else:
                found_jobs = s.getJobs(jobs=jobids_to_inquire, as_dict=True)

            # I update the status of jobs

            for c in calcs_to_inquire:
                try:
                    logger_extra = get_dblogger_extra(c)
                    t._set_logger_extra(logger_extra)

                    jobid = c.get_job_id()
                    if jobid is None:
                        execlogger.error("JobCalculation {} is WITHSCHEDULER "
                                         "but no job id was found!".format(
                                             c.pk),
                                         extra=logger_extra)
                        continue

                    # I check if the calculation to be checked (c)
                    # is in the output of qstat
                    if jobid in found_jobs:
                        # jobinfo: the information returned by
                        # qstat for this job
                        jobinfo = found_jobs[jobid]
                        execlogger.debug("Inquirying calculation {} (jobid "
                                         "{}): it has job_state={}".format(
                                             c.pk, jobid, jobinfo.job_state),
                                         extra=logger_extra)
                        # For the moment, FAILED is not defined
                        if jobinfo.job_state in [job_states.DONE
                                                 ]:  # , job_states.FAILED]:
                            computed.append(c)
                            try:
                                c._set_state(calc_states.COMPUTED)
                            except ModificationNotAllowed:
                                # Someone already set it, just skip
                                pass

                        ## Do not set the WITHSCHEDULER state multiple times,
                        ## this would raise a ModificationNotAllowed
                        # else:
                        # c._set_state(calc_states.WITHSCHEDULER)

                        c._set_scheduler_state(jobinfo.job_state)

                        c._set_last_jobinfo(jobinfo)
                    else:
                        execlogger.debug("Inquirying calculation {} (jobid "
                                         "{}): not found, assuming "
                                         "job_state={}".format(
                                             c.pk, jobid, job_states.DONE),
                                         extra=logger_extra)

                        # calculation c is not found in the output of qstat
                        computed.append(c)
                        c._set_scheduler_state(job_states.DONE)
                except Exception as e:
                    # TODO: implement a counter, after N retrials
                    # set it to a status that
                    # requires the user intervention
                    execlogger.warning("There was an exception for "
                                       "calculation {} ({}): {}".format(
                                           c.pk, e.__class__.__name__,
                                           e.message),
                                       extra=logger_extra)
                    continue

            for c in computed:
                try:
                    logger_extra = get_dblogger_extra(c)
                    try:
                        detailed_jobinfo = s.get_detailed_jobinfo(
                            jobid=c.get_job_id())
                    except NotImplementedError:
                        detailed_jobinfo = (
                            u"AiiDA MESSAGE: This scheduler does not implement "
                            u"the routine get_detailed_jobinfo to retrieve "
                            u"the information on "
                            u"a job after it has finished.")
                    last_jobinfo = c._get_last_jobinfo()
                    if last_jobinfo is None:
                        last_jobinfo = JobInfo()
                        last_jobinfo.job_id = c.get_job_id()
                        last_jobinfo.job_state = job_states.DONE
                    last_jobinfo.detailedJobinfo = detailed_jobinfo
                    c._set_last_jobinfo(last_jobinfo)
                except Exception as e:
                    execlogger.warning("There was an exception while "
                                       "retrieving the detailed jobinfo "
                                       "for calculation {} ({}): {}".format(
                                           c.pk, e.__class__.__name__,
                                           e.message),
                                       extra=logger_extra)
                    continue
                finally:
                    # Set the state to COMPUTED as the very last thing
                    # of this routine; no further change should be done after
                    # this, so that in general the retriever can just
                    # poll for this state, if we want to.
                    try:
                        c._set_state(calc_states.COMPUTED)
                    except ModificationNotAllowed:
                        # Someone already set it, just skip
                        pass

    return computed
示例#10
0
def submit_jobs():
    """
    Submit all jobs in the TOSUBMIT state.
    """
    from aiida.orm import JobCalculation, Computer, User
    from aiida.utils.logger import get_dblogger_extra
    from aiida.backends.utils import get_authinfo, QueryFactory

    qmanager = QueryFactory()()
    # I create a unique set of pairs (computer, aiidauser)
    computers_users_to_check = qmanager.query_jobcalculations_by_computer_user_state(
        state=calc_states.TOSUBMIT,
        only_computer_user_pairs=True,
        only_enabled=True)

    for computer, aiidauser in computers_users_to_check:

        execlogger.debug("({},{}) pair to submit".format(
            aiidauser.email, computer.name))

        try:
            try:
                authinfo = get_authinfo(computer.dbcomputer, aiidauser._dbuser)
            except AuthenticationError:
                # TODO!!
                # Put each calculation in the SUBMISSIONFAILED state because
                # I do not have AuthInfo to submit them
                calcs_to_inquire = qmanager.query_jobcalculations_by_computer_user_state(
                    state=calc_states.TOSUBMIT,
                    computer=computer,
                    user=aiidauser)
                #~ calcs_to_inquire = JobCalculation._get_all_with_state(
                #~ state=calc_states.TOSUBMIT,
                #~ computer=computer, user=aiidauser)
                for calc in calcs_to_inquire:
                    try:
                        calc._set_state(calc_states.SUBMISSIONFAILED)
                    except ModificationNotAllowed:
                        # Someone already set it, just skip
                        pass
                    logger_extra = get_dblogger_extra(calc)
                    execlogger.error("Submission of calc {} failed, "
                                     "computer pk= {} ({}) is not configured "
                                     "for aiidauser {}".format(
                                         calc.pk, computer.pk,
                                         computer.get_name(), aiidauser.email),
                                     extra=logger_extra)
                # Go to the next (dbcomputer,aiidauser) pair
                continue

            submitted_calcs = submit_jobs_with_authinfo(authinfo)
        except Exception as e:
            import traceback

            msg = ("Error while submitting jobs "
                   "for aiidauser={} on computer={}, "
                   "error type is {}, traceback: {}".format(
                       aiidauser.email, computer.name, e.__class__.__name__,
                       traceback.format_exc()))
            print msg
            execlogger.error(msg)
            # Continue with next computer
            continue
示例#11
0
def retrieve_computed_for_authinfo(authinfo):
    from aiida.orm import JobCalculation
    from aiida.common.folders import SandboxFolder
    from aiida.orm.data.folder import FolderData
    from aiida.common.log import get_dblogger_extra
    from aiida.orm import DataFactory

    from aiida.backends.utils import QueryFactory

    import os

    if not authinfo.enabled:
        return

    qmanager = QueryFactory()()
    # I create a unique set of pairs (computer, aiidauser)
    calcs_to_retrieve = qmanager.query_jobcalculations_by_computer_user_state(
        state=calc_states.COMPUTED,
        computer=authinfo.dbcomputer,
        user=authinfo.aiidauser)

    retrieved = []

    # I avoid to open an ssh connection if there are no
    # calcs with state not COMPUTED
    if len(calcs_to_retrieve):

        # Open connection
        with authinfo.get_transport() as t:
            for calc in calcs_to_retrieve:
                logger_extra = get_dblogger_extra(calc)
                t._set_logger_extra(logger_extra)

                try:
                    calc._set_state(calc_states.RETRIEVING)
                except ModificationNotAllowed:
                    # Someone else has already started to retrieve it,
                    # just log and continue
                    execlogger.debug("Attempting to retrieve more than once "
                                     "calculation {}: skipping!".format(
                                         calc.pk),
                                     extra=logger_extra)
                    continue  # with the next calculation to retrieve
                try:
                    execlogger.debug("Retrieving calc {}".format(calc.pk),
                                     extra=logger_extra)
                    workdir = calc._get_remote_workdir()
                    retrieve_list = calc._get_retrieve_list()
                    retrieve_temporary_list = calc._get_retrieve_temporary_list(
                    )
                    retrieve_singlefile_list = calc._get_retrieve_singlefile_list(
                    )
                    execlogger.debug("[retrieval of calc {}] "
                                     "chdir {}".format(calc.pk, workdir),
                                     extra=logger_extra)
                    t.chdir(workdir)

                    retrieved_files = FolderData()
                    retrieved_files.add_link_from(
                        calc,
                        label=calc._get_linkname_retrieved(),
                        link_type=LinkType.CREATE)

                    # First, retrieve the files of folderdata
                    with SandboxFolder() as folder:
                        retrieve_files_from_list(calc, t, folder,
                                                 retrieve_list)
                        # Here I retrieved everything; now I store them inside the calculation
                        retrieved_files.replace_with_folder(folder.abspath,
                                                            overwrite=True)

                    # Second, retrieve the singlefiles
                    with SandboxFolder() as folder:
                        singlefile_list = []
                        for (linkname, subclassname,
                             filename) in retrieve_singlefile_list:
                            execlogger.debug(
                                "[retrieval of calc {}] Trying "
                                "to retrieve remote singlefile '{}'".format(
                                    calc.pk, filename),
                                extra=logger_extra)
                            localfilename = os.path.join(
                                folder.abspath,
                                os.path.split(filename)[1])
                            t.get(filename,
                                  localfilename,
                                  ignore_nonexisting=True)
                            singlefile_list.append(
                                (linkname, subclassname, localfilename))

                        # ignore files that have not been retrieved
                        singlefile_list = [
                            i for i in singlefile_list if os.path.exists(i[2])
                        ]

                        # after retrieving from the cluster, I create the objects
                        singlefiles = []
                        for (linkname, subclassname,
                             filename) in singlefile_list:
                            SinglefileSubclass = DataFactory(subclassname)
                            singlefile = SinglefileSubclass()
                            singlefile.set_file(filename)
                            singlefile.add_link_from(calc,
                                                     label=linkname,
                                                     link_type=LinkType.CREATE)
                            singlefiles.append(singlefile)

                    # Retrieve the temporary files in a separate temporary folder if any files were
                    # specified in the 'retrieve_temporary_list' key
                    if retrieve_temporary_list:
                        retrieved_temporary_folder = FolderData()
                        with SandboxFolder() as folder:
                            retrieve_files_from_list(calc, t, folder,
                                                     retrieve_temporary_list)
                            retrieved_temporary_folder.replace_with_folder(
                                folder.abspath, overwrite=True)

                        # Log the files that were retrieved in the temporary folder
                        for entry in retrieved_temporary_folder.get_folder_list(
                        ):
                            execlogger.debug(
                                "[retrieval of calc {}] Retrieved temporary file or folder '{}'"
                                .format(calc.pk, entry),
                                extra=logger_extra)
                    else:
                        retrieved_temporary_folder = None

                    # Finally, store the retrieved_files node. The retrieved_temporary_folder node
                    # is explicitly not stored, but will just be passed to the parser.parse_from calc call
                    execlogger.debug(
                        "[retrieval of calc {}] Storing retrieved_files={}".
                        format(calc.pk, retrieved_files.dbnode.pk),
                        extra=logger_extra)
                    retrieved_files.store()

                    for fil in singlefiles:
                        execlogger.debug(
                            "[retrieval of calc {}] Storing retrieved_singlefile={}"
                            .format(calc.pk, fil.dbnode.pk),
                            extra=logger_extra)
                        fil.store()

                    # If I was the one retrieving, I should also be the only one parsing! I do not check
                    calc._set_state(calc_states.PARSING)

                    Parser = calc.get_parserclass()
                    # If no parser is set, the calculation is successful
                    successful = True
                    if Parser is not None:
                        parser = Parser(calc)
                        successful, new_nodes_tuple = parser.parse_from_calc(
                            retrieved_temporary_folder)

                        for label, n in new_nodes_tuple:
                            n.add_link_from(calc,
                                            label=label,
                                            link_type=LinkType.CREATE)
                            n.store()

                    if successful:
                        try:
                            calc._set_state(calc_states.FINISHED)
                        except ModificationNotAllowed:
                            # I should have been the only one to set it, but
                            # in order to avoid unuseful error messages, I
                            # just ignore
                            pass
                    else:
                        try:
                            calc._set_state(calc_states.FAILED)
                        except ModificationNotAllowed:
                            # I should have been the only one to set it, but
                            # in order to avoid unuseful error messages, I
                            # just ignore
                            pass
                        execlogger.error(
                            "[parsing of calc {}] "
                            "The parser returned an error, but it should have "
                            "created an output node with some partial results "
                            "and warnings. Check there for more information on "
                            "the problem".format(calc.pk),
                            extra=logger_extra)
                    retrieved.append(calc)
                except Exception:
                    import traceback

                    tb = traceback.format_exc()
                    newextradict = logger_extra.copy()
                    newextradict['full_traceback'] = tb
                    if calc.get_state() == calc_states.PARSING:
                        execlogger.error("Error parsing calc {}. "
                                         "Traceback: {}".format(calc.pk, tb),
                                         extra=newextradict)
                        # TODO: add a 'comment' to the calculation
                        try:
                            calc._set_state(calc_states.PARSINGFAILED)
                        except ModificationNotAllowed:
                            pass
                    else:
                        execlogger.error("Error retrieving calc {}. "
                                         "Traceback: {}".format(calc.pk, tb),
                                         extra=newextradict)
                        try:
                            calc._set_state(calc_states.RETRIEVALFAILED)
                        except ModificationNotAllowed:
                            pass
                        raise

    return retrieved