Exemplo n.º 1
0
def getOSFEntry(dagManJobId):
    """
    Given a DAGManJobId `dagManJobId`, find all the blackboard entries that are
    associated with that DAG and present them in an OSF-like manner:
        Dataset, Owner, DAGManJobId, [Nodei JobState, Nodei ExitCode, ]

    WARNING: while this works also if you do not specify a full Global Job ID
    but just a ClusterId (which incidentally is what is stored in the database
    as well as in each Job ClassAd), there is a risk. The risk is that we end up
    returning Jobs/Blackboard entries that are not associated to that DAG but
    simply happen to have the same DAGManJobId ClusterId (maybe because they
    were submitted on a different host or because we reinstalled Condor in the
    mean time etc.). For this reason, it is always better to use a GlobalJobId
    or at least fabricate one with the right form:
        <submit host>#<ClusterId>.0#<whatever>
    Even that is not safe as we might have reinstalled Condor and resetted the
    ClusterId counter...
    """
    # Define the database connection.
    elixir.metadata.bind = DATABASE_CONNECTION_STR
    elixir.metadata.bind.echo = False
    elixir.setup_all()

    osfEntries = []

    # Get all the relevant entries, grouped by their DAGManJobId.
    if(not dagManJobId):
        return

    # See if dagManJobId is a global job id or a local one.
    submit_host = None
    if(condorutils.is_globaljobid(dagManJobId)):
        [submit_host, jobId, _] = condorutils.parse_globaljobid(dagManJobId)
        dagManJobId = int(jobId.split('.')[0])

    query = Blackboard.query.filter_by(DAGManJobId=unicode(dagManJobId))
    if(submit_host):
        # This is why global job ids are much safer (but not super safe).
        submit_host = unicode(submit_host)
        query = query.filter(Blackboard.GlobalJobId.startswith(submit_host))
    entries = query.order_by(Blackboard.ClusterId, Blackboard.ProcId).all()
    if(not entries):
        return

    # Now build the OSF-like entry.
    osfEntry = (entries[0].Dataset,
                entries[0].Owner,
                entries[0].DAGManJobId,
                entries)
    return(osfEntry)
Exemplo n.º 2
0
def getOSFEntry(dagManJobId):
    """
    Given a DAGManJobId `dagManJobId`, find all the blackboard entries that are
    associated with that DAG and present them in an OSF-like manner:
        Dataset, Owner, DAGManJobId, [Nodei JobState, Nodei ExitCode, ]

    WARNING: while this works also if you do not specify a full Global Job ID
    but just a ClusterId (which incidentally is what is stored in the database
    as well as in each Job ClassAd), there is a risk. The risk is that we end up
    returning Jobs/Blackboard entries that are not associated to that DAG but
    simply happen to have the same DAGManJobId ClusterId (maybe because they
    were submitted on a different host or because we reinstalled Condor in the
    mean time etc.). For this reason, it is always better to use a GlobalJobId
    or at least fabricate one with the right form:
        <submit host>#<ClusterId>.0#<whatever>
    Even that is not safe as we might have reinstalled Condor and resetted the
    ClusterId counter...
    """
    # Define the database connection.
    elixir.metadata.bind = DATABASE_CONNECTION_STR
    elixir.metadata.bind.echo = False
    elixir.setup_all()

    osfEntries = []

    # Get all the relevant entries, grouped by their DAGManJobId.
    if (not dagManJobId):
        return

    # See if dagManJobId is a global job id or a local one.
    submit_host = None
    if (condorutils.is_globaljobid(dagManJobId)):
        [submit_host, jobId, _] = condorutils.parse_globaljobid(dagManJobId)
        dagManJobId = int(jobId.split('.')[0])

    query = Blackboard.query.filter_by(DAGManJobId=unicode(dagManJobId))
    if (submit_host):
        # This is why global job ids are much safer (but not super safe).
        submit_host = unicode(submit_host)
        query = query.filter(Blackboard.GlobalJobId.startswith(submit_host))
    entries = query.order_by(Blackboard.ClusterId, Blackboard.ProcId).all()
    if (not entries):
        return

    # Now build the OSF-like entry.
    osfEntry = (entries[0].Dataset, entries[0].Owner, entries[0].DAGManJobId,
                entries)
    return (osfEntry)
Exemplo n.º 3
0
    def _fix_dagman_job_id(self):
        """
        DAGManJobId is simply the parent DAGMan ClusterId, we would like to have
        it be a full GlobalJobId and here we try to infer the missing pieces of
        information (namely the hostname and submission timestamp) from the job
        classad.
        """
        # Remember that in Condor, ClusterIds start form 1, not 0. Also, if the
        # classad has a DAGManJobId, we assume that its MyType == Job
        dagman_job_id = getattr(self, 'DAGManJobId', None)

        # We can only fix DAGManJobId if we have CONDOR_PARENT_ID defined in the
        # job classad environment string.
        if(dagman_job_id and self.environmentdict):
            # parnt_id = submit_host:integer:timestamp
            parent_id = self.environmentdict.get('CONDOR_PARENT_ID', '')
            if(not parent_id):
                msg = 'CONDOR_PARENT_ID not defined in Job Environment string.'
                raise(Exception(msg))

            timestamp = parent_id.split(':')[-1]
            (host, _, _) = condorutils.parse_globaljobid(self.GlobalJobId)
            self.DAGManJobId = '%s#%s.0#%s' % (host, dagman_job_id, timestamp)
        return