def _condorJobsInQueue(self, cdata):
     '''Returns the number of jobs still in the queue for a cluster.'''
     jobCount = None
     # condor_q -name q1@`hostname` -f "%d\n" ClusterID 11292
     # We don't care if jobs are in the queue in the C or X state. So filter
     # those out with a constraint.
     if cdata.get('queue'):
         cmd = ['condor_q', '-name', cdata.get('queue'), '-f', '"%d\\n"', 'ClusterID', '-c', '"JobStatus != 3 && JobStatus != 4"', cdata.get('clusterid')]
     else:
         cmd = ['condor_q', '-f', '"%d\\n"', 'ClusterID', '-c', 'JobStatus != 3 && JobStatus != 4', cdata.get('clusterid')]
     logging.info('[cleaner] ...running: %s' % ' '.join(cmd))
     (return_code, stdout_value, stderr_value) = util.runCommand2(' '.join(cmd))
     # Case #8380: Job directories are being deleted when jobs remain in the queue
     # Pre Condor 7.2.2 it's not enough to just check the return code. Condor < 7.2.2 would often set
     # the return code to 0 and write error notes to stderr. So we have to check that stderr is
     # empty as well.
     if len(stderr_value) > 0:
         logging.error('[cleaner] ...got error running command: %s' % stderr_value)
     else:
         if return_code == 0:
             # Count the lines in the output that have the cluster ID in them
             # That's the number of jobs in the queue still.
             repat = re.compile(r"^\s*%s\s*" % cdata.get('clusterid'), re.M)
             matches = re.findall(repat, stdout_value)
             jobCount = len(matches)
     return jobCount
示例#2
0
def doCondorSubmit(submitFile, queueName):
    clusterId = None
    # Change to the Condor directory
    (submission_directory, filename) = os.path.split(submitFile)
    
    logging.debug("CondorAgent.post_submit.doCondorSubmit(): submission file: %s" % submitFile)
    # TODO: make sure condor_submit is in the path and is available.
    submit_cmd = ['condor_submit']
    if queueName:
        submit_cmd.append('-name')
        submit_cmd.append('%s' % queueName)
    # Case 7108: Add a new configuration option that allows users to pass along custom command
    # line arguments to insert in to the condor_submit call made by Condor Agent.
    # The syntax for the option is a comma-seperated list. With each value in the list being
    # an element in the command line argument
    additional_arguments = []
    add_str = CondorAgent.util.getCondorConfigVal('CONDOR_AGENT_SUBMIT_PROXY_ADDITIONAL_ARGUMENTS')
    if add_str:
        # Add cleaned up versions of the arguments to our array
        additional_arguments = [i.strip() for i in string.split(add_str, ',')]
    if len(additional_arguments) > 0:
        logging.debug("CondorAgent.post_submit.doCondorSubmit(): Adding additional, user supplied arguments: %s" % ' '.join(additional_arguments))
        submit_cmd.extend(additional_arguments)
    submit_cmd.append('%s' % submitFile)
    logging.debug("CondorAgent.post_submit.doCondorSubmit(): condor_submit command: %s" % ' '.join(submit_cmd))
    
    # Set the umask to be liberal so files that get created can be edited by anyone
    current_umask = os.umask(0)
    
    try:
        (retcode, submit_out, submit_err) = util.runCommand2(cmd=' '.join(submit_cmd), cwd=submission_directory)
    except:
        # TODO: determine exact error condition check. Possible that there are still warnings we should log.
        logging.error("CondorAgent.post_submit.doCondorSubmit(): Unexpected error: %s, %s" % (sys.exc_info()[0], str(sys.exc_info()[1])))
        if submit_out:
            logging.error("CondorAgent.post_submit.doCondorSubmit(): submit_out: %s" % submit_out)
        if submit_err:
            logging.error("CondorAgent.post_submit.doCondorSubmit(): submit_err: %s" % submit_err)
        os.umask(current_umask)
        cleanSubmissionDir(submission_directory)
        raise
    os.umask(current_umask)
    logging.debug("CondorAgent.post_submit.doCondorSubmit(): retcode:    %d" % retcode)
    logging.debug("CondorAgent.post_submit.doCondorSubmit(): submit_out: %s" % submit_out)
    logging.debug("CondorAgent.post_submit.doCondorSubmit(): submit_err: %s" % submit_err)
    if retcode == 0:
        match = re.search("submitted to cluster (\\d+)", submit_out)
        if match == None:
            logging.error('CondorAgent.post_submit.doCondorSubmit(): Unable to parse submission details from condor_q output')
            cleanSubmissionDir(submission_directory)
            raise Exception("Failed to parse cluster id from output:\n%s" % submit_out)
        clusterId = match.group(1)
    else:
        # TODO: parse the error to figure out what happened.
        logging.error('CondorAgent.post_submit.doCondorSubmit(): Condor submission failed')
        cleanSubmissionDir(submission_directory)
        raise Exception("Failed to submit jobs to condor with error:\n%s" % submit_err)
    logging.info('CondorAgent.post_submit.doCondorSubmit(): Returning cluster ID: %s' % str(clusterId))
    return clusterId
示例#3
0
def doCondorSubmit(submitFile, queueName):
    clusterId = None
    # Change to the Condor directory
    (submission_directory, filename) = os.path.split(submitFile)
    
    logging.debug("CondorAgent.post_submit.doCondorSubmit(): submission file: %s" % submitFile)
    # TODO: make sure condor_submit is in the path and is available.
    submit_cmd = ['condor_submit']
    if queueName:
        submit_cmd.append('-name')
        submit_cmd.append('%s' % queueName)
    # Case 7108: Add a new configuration option that allows users to pass along custom command
    # line arguments to insert in to the condor_submit call made by Condor Agent.
    # The syntax for the option is a comma-seperated list. With each value in the list being
    # an element in the command line argument
    additional_arguments = []
    add_str = CondorAgent.util.getCondorConfigVal('CONDOR_AGENT_SUBMIT_PROXY_ADDITIONAL_ARGUMENTS')
    if add_str:
        # Add cleaned up versions of the arguments to our array
        additional_arguments = [i.strip() for i in string.split(add_str, ',')]
    if len(additional_arguments) > 0:
        logging.debug("CondorAgent.post_submit.doCondorSubmit(): Adding additional, user supplied arguments: %s" % ' '.join(additional_arguments))
        submit_cmd.extend(additional_arguments)
    submit_cmd.append('%s' % submitFile)
    logging.debug("CondorAgent.post_submit.doCondorSubmit(): condor_submit command: %s" % ' '.join(submit_cmd))
    
    # Set the umask to be liberal so files that get created can be edited by anyone
    current_umask = os.umask(0)
    
    try:
        (retcode, submit_out, submit_err) = util.runCommand2(cmd=' '.join(submit_cmd), cwd=submission_directory)
    except:
        # TODO: determine exact error condition check. Possible that there are still warnings we should log.
        logging.error("CondorAgent.post_submit.doCondorSubmit(): Unexpected error: %s, %s" % (sys.exc_info()[0], str(sys.exc_info()[1])))
        if submit_out:
            logging.error("CondorAgent.post_submit.doCondorSubmit(): submit_out: %s" % submit_out)
        if submit_err:
            logging.error("CondorAgent.post_submit.doCondorSubmit(): submit_err: %s" % submit_err)
        os.umask(current_umask)
        cleanSubmissionDir(submission_directory)
        raise
    os.umask(current_umask)
    logging.debug("CondorAgent.post_submit.doCondorSubmit(): retcode:    %d" % retcode)
    logging.debug("CondorAgent.post_submit.doCondorSubmit(): submit_out: %s" % submit_out)
    logging.debug("CondorAgent.post_submit.doCondorSubmit(): submit_err: %s" % submit_err)
    if retcode == 0:
        match = re.search("submitted to cluster (\\d+)", submit_out)
        if match == None:
            logging.error('CondorAgent.post_submit.doCondorSubmit(): Unable to parse submission details from condor_q output')
            cleanSubmissionDir(submission_directory)
            raise Exception("Failed to parse cluster id from output:\n%s" % submit_out)
        clusterId = match.group(1)
    else:
        # TODO: parse the error to figure out what happened.
        logging.error('CondorAgent.post_submit.doCondorSubmit(): Condor submission failed')
        cleanSubmissionDir(submission_directory)
        raise Exception("Failed to submit jobs to condor with error:\n%s" % submit_err)
    logging.info('CondorAgent.post_submit.doCondorSubmit(): Returning cluster ID: %s' % str(clusterId))
    return clusterId
示例#4
0
 def _condorJobsInQueue(self, cdata):
     '''Returns the number of jobs still in the queue for a cluster.'''
     jobCount = None
     # condor_q -name q1@`hostname` -f "%d\n" ClusterID 11292
     # We don't care if jobs are in the queue in the C or X state. So filter
     # those out with a constraint.
     if cdata.get('queue'):
         cmd = [
             'condor_q', '-name',
             cdata.get('queue'), '-f', '"%d\\n"', 'ClusterID', '-c',
             '"JobStatus != 3 && JobStatus != 4"',
             cdata.get('clusterid')
         ]
     else:
         cmd = [
             'condor_q', '-f', '"%d\\n"', 'ClusterID', '-c',
             'JobStatus != 3 && JobStatus != 4',
             cdata.get('clusterid')
         ]
     logging.info('[cleaner] ...running: %s' % ' '.join(cmd))
     (return_code, stdout_value,
      stderr_value) = util.runCommand2(' '.join(cmd))
     # Case #8380: Job directories are being deleted when jobs remain in the queue
     # Pre Condor 7.2.2 it's not enough to just check the return code. Condor < 7.2.2 would often set
     # the return code to 0 and write error notes to stderr. So we have to check that stderr is
     # empty as well.
     if len(stderr_value) > 0:
         logging.error('[cleaner] ...got error running command: %s' %
                       stderr_value)
     else:
         if return_code == 0:
             # Count the lines in the output that have the cluster ID in them
             # That's the number of jobs in the queue still.
             repat = re.compile(r"^\s*%s\s*" % cdata.get('clusterid'), re.M)
             matches = re.findall(repat, stdout_value)
             jobCount = len(matches)
     return jobCount
 def testDoRunCommand2(self):
     (retval, out, err) = util.runCommand2("ls -al")
     self.assertEqual(0, retval)
     self.assertTrue(out != '')
     self.assertEqual('', err)
示例#6
0
 def testDoRunCommand2(self):
     (retval, out, err) = util.runCommand2("ls -al")
     self.assertEqual(0, retval)
     self.assertTrue(out != '')
     self.assertEqual('', err)