def run(self):
     '''Runs the thread in a loop, looking for clusters than can have their
     submit directories wiped from disk because they are no longer in the
     queue. Sleeps after each pass on the submit directory list.'''
     
     logging.info('[cleaner] Local submission cleanup thread starting up')
     
     # TODO Should we warn the user if they're running as root? That's dangerous.
     # TODO Should we context switch to CONDOR_IDS automatically if we're root?
     
     while not self._stopevent.isSet():
         # Case #8236: Sleep when the cleanup thread starts. Gives the schedds a
         # chance to start up and avoids a race condition that can delete the directories
         # of jobs that may actually still be in the queued.
         logging.info('[cleaner] Sleeping for %d seconds' % self._sleeptime)
         self._stopevent.wait(self._sleeptime)            
         submitDir = util.getCondorConfigVal('CONDOR_AGENT_SUBMIT_DIR', default='""').replace('"', '')
         if submitDir == '':
             logging.error('[cleaner] Could not find a CONDOR_AGENT_SUBMIT_DIR setting for this host -- no cleanup performed')
         elif util.getCondorConfigVal('CONDOR_AGENT_SKIP_CLEANUP', default=False):
             logging.info('[cleaner] CONDOR_AGENT_SKIP_CLEANUP is True. Skipping cleanup')
         else:
             logging.info('[cleaner] Scanning submit directory \'%s\' for *.cluster files...' % submitDir)
             for c in self._locate(pattern='*.cluster', root=submitDir):
                 # I never want this thread to exit because of an exception so we'll blanket trap
                 # everything at this level and just report it back as an error.
                 try:
                     self._safeRemoveClusterFiles(c)
                 except Exception, e:
                     logging.error('[cleaner] Caught unhandled exception: %s' % (str(e)))
示例#2
0
    def run(self):
        '''Runs the thread in a loop, looking for clusters than can have their
        submit directories wiped from disk because they are no longer in the
        queue. Sleeps after each pass on the submit directory list.'''

        logging.info('[cleaner] Local submission cleanup thread starting up')

        # TODO Should we warn the user if they're running as root? That's dangerous.
        # TODO Should we context switch to CONDOR_IDS automatically if we're root?

        while not self._stopevent.isSet():
            # Case #8236: Sleep when the cleanup thread starts. Gives the schedds a
            # chance to start up and avoids a race condition that can delete the directories
            # of jobs that may actually still be in the queued.
            logging.info('[cleaner] Sleeping for %d seconds' % self._sleeptime)
            self._stopevent.wait(self._sleeptime)
            submitDir = util.getCondorConfigVal('CONDOR_AGENT_SUBMIT_DIR',
                                                default='""').replace('"', '')
            if submitDir == '':
                logging.error(
                    '[cleaner] Could not find a CONDOR_AGENT_SUBMIT_DIR setting for this host -- no cleanup performed'
                )
            else:
                logging.info(
                    '[cleaner] Scanning submit directory \'%s\' for *.cluster files...'
                    % submitDir)
                for c in self._locate(pattern='*.cluster', root=submitDir):
                    # I never want this thread to exit because of an exception so we'll blanket trap
                    # everything at this level and just report it back as an error.
                    try:
                        self._safeRemoveClusterFiles(c)
                    except Exception, e:
                        logging.error(
                            '[cleaner] Caught unhandled exception: %s' %
                            (str(e)))
def cleanSubmissionDir(submission_dir):
    '''
    Cleans up after a failed submission attempt so disk pollution does not get
    so bad. Does some minimal checking on submission_dir to make sure it
    does not do something stupid like delete / or something like that.
    '''
    # Check if cleanup is disabled
    if util.getCondorConfigVal('CONDOR_AGENT_SKIP_CLEANUP', default=False):
        logging.warn('CONDOR_AGENT_SKIP_CLEANUP is true. Not cleaning failed submission.')
        return

    submit_dir_expected_prefix = CondorAgent.util.getCondorConfigVal("CONDOR_AGENT_SUBMIT_DIR").replace('"', '')
    if submit_dir_expected_prefix and submit_dir_expected_prefix != '' and len(submit_dir_expected_prefix) > 3:
        # That's a bit of a lame check, >3 -- it could be better. On !Windows it should suffice to
        # stop someone from accidentally removing /. On Windows it should suffice to stop someone
        # from accidentally removing something like C:\. In truth this path should always be longer
        # than 3 characters...so for now we'll say it's good enough.
        # TODO Improve the safety checks before we try to delete things off of disk
        if submission_dir.find(submit_dir_expected_prefix) > -1:
            # We will trust that the user wasn't stupid enough to use / as their
            # submission dir.
            if os.path.isdir(submission_dir):
                logging.info('Cleaning up directory %s after failed submission' % submission_dir)
                try:
                    shutil.rmtree(submission_dir)
                except Exception, e:
                    logging.error('Unable to clean up %s after failed submission: %s' % (submission_dir, str(e)))
            else:
                logging.warning('Skipped cleanup of %s after failed submission, path is not a directory' % submission_dir)
        else:
            logging.warning('Skipped cleanup of %s after failed submission, path does not start with expected string "%s"' % (submission_dir, submit_dir_expected_prefix))
示例#4
0
 def getHistory(self, completed_since, jobs):
     '''Returns a tuple of history, new_completed_since.  The new value for
     completed_since comes from the last job (with a non-zero
     CompletionDate) that was read. This ensures that the next time
     the client reads we resume from the last value we read in the
     file. (Previously we used the current timestamp.)'''
     new_completed_since = completed_since
     history_file = util.getCondorConfigVal("HISTORY", "schedd",
                                            self.scheddName)
     if history_file == None:
         raise Exception("History is not enabled on this scheduler")
     # Case 5416: HISTORY should be a file, not a directory
     if os.path.isdir(history_file):
         raise Exception("The HISTORY setting is a directory")
     # Case 5458: Consider an empty string value for HISTORY to be the same as None
     # and raise an exception.
     if len(history_file.strip()) == 0:
         raise Exception("The HISTORY setting is an empty string")
     history_file = os.path.normpath(history_file)
     logging.info("History file for daemon %s: %s" %
                  (self.scheddName, history_file))
     files = glob.glob(history_file + "*")
     history_data = ''
     for f in files:
         if os.path.isfile(f):
             mod = os.path.getmtime(f)
             # allow for some overlap when testing
             # note: we don't skip the current file based on its timestamp because we don't
             # want to rely on that being updated properly
             if mod >= (completed_since - COMPLETED_SINCE_OVERLAP
                        ) or os.path.normpath(f) == history_file:
                 # each output from condor_history has a trailing newline so we can
                 # just concatenate them
                 if jobs != "":
                     history_data = history_data + self.getItemizedHistoryFromFile(
                         completed_since, jobs, f)
                 else:
                     new_data, new_time = self.getHistoryFromFile(
                         completed_since, f)
                     # keep the latest we've seen
                     new_completed_since = max(new_time,
                                               new_completed_since)
                     logging.debug("New CompletedSince: %s" %
                                   new_completed_since)
                     history_data = history_data + new_data
             else:
                 logging.info(
                     "History file %s was last modified before given completedSince, skipped"
                     % os.path.basename(f))
     return (history_data, new_completed_since)
def cleanSubmissionDir(submission_dir):
    '''
    Cleans up after a failed submission attempt so disk pollution does not get
    so bad. Does some minimal checking on submission_dir to make sure it
    does not do something stupid like delete / or something like that.
    '''
    # Check if cleanup is disabled
    if util.getCondorConfigVal('CONDOR_AGENT_SKIP_CLEANUP', default=False):
        logging.warn(
            'CONDOR_AGENT_SKIP_CLEANUP is true. Not cleaning failed submission.'
        )
        return

    submit_dir_expected_prefix = CondorAgent.util.getCondorConfigVal(
        "CONDOR_AGENT_SUBMIT_DIR").replace('"', '')
    if submit_dir_expected_prefix and submit_dir_expected_prefix != '' and len(
            submit_dir_expected_prefix) > 3:
        # That's a bit of a lame check, >3 -- it could be better. On !Windows it should suffice to
        # stop someone from accidentally removing /. On Windows it should suffice to stop someone
        # from accidentally removing something like C:\. In truth this path should always be longer
        # than 3 characters...so for now we'll say it's good enough.
        # TODO Improve the safety checks before we try to delete things off of disk
        if submission_dir.find(submit_dir_expected_prefix) > -1:
            # We will trust that the user wasn't stupid enough to use / as their
            # submission dir.
            if os.path.isdir(submission_dir):
                logging.info(
                    'Cleaning up directory %s after failed submission' %
                    submission_dir)
                try:
                    shutil.rmtree(submission_dir)
                except Exception, e:
                    logging.error(
                        'Unable to clean up %s after failed submission: %s' %
                        (submission_dir, str(e)))
            else:
                logging.warning(
                    'Skipped cleanup of %s after failed submission, path is not a directory'
                    % submission_dir)
        else:
            logging.warning(
                'Skipped cleanup of %s after failed submission, path does not start with expected string "%s"'
                % (submission_dir, submit_dir_expected_prefix))
示例#6
0
    def getHistory(self, completed_since, jobs):
        '''Returns a tuple of history, new_completed_since.  The new value for
        completed_since comes from the last job (with a non-zero
        CompletionDate) that was read. This ensures that the next time
        the client reads we resume from the last value we read in the
        file. (Previously we used the current timestamp.)'''
        new_completed_since = completed_since
        history_file = util.getCondorConfigVal("HISTORY", "schedd", self.scheddName)
        if history_file == None:
            raise Exception("History is not enabled on this scheduler")
        # Case 5416: HISTORY should be a file, not a directory
	if os.path.isdir(history_file) :
            raise Exception("The HISTORY setting is a directory")
        # Case 5458: Consider an empty string value for HISTORY to be the same as None
        # and raise an exception.
        if len(history_file.strip()) == 0 :
            raise Exception("The HISTORY setting is an empty string")
        history_file = os.path.normpath(history_file)
        logging.info("History file for daemon %s: %s"%(self.scheddName, history_file))
        files        = glob.glob(history_file + "*")
        history_data = ''
        for f in files:
            if os.path.isfile(f):
                mod = os.path.getmtime(f)
                # allow for some overlap when testing
                # note: we don't skip the current file based on its timestamp because we don't 
                # want to rely on that being updated properly
                if mod >= (completed_since - COMPLETED_SINCE_OVERLAP) or os.path.normpath(f) == history_file:
                    # each output from condor_history has a trailing newline so we can
                    # just concatenate them
                    if jobs != "":
                        history_data = history_data + self.getItemizedHistoryFromFile(completed_since, jobs, f)
                    else:
                        new_data, new_time = self.getHistoryFromFile(completed_since, f)
                        # keep the latest we've seen
                        new_completed_since = max(new_time, new_completed_since)
                        logging.debug("New CompletedSince: %s" % new_completed_since)
                        history_data = history_data + new_data
                else:
                    logging.info("History file %s was last modified before given completedSince, skipped" % os.path.basename(f))
        return (history_data, new_completed_since)
示例#7
0
 def getHistory(self, completed_since, jobs):
     history_file = util.getCondorConfigVal("HISTORY", "schedd", self.scheddName)
     if history_file == None:
         raise Exception("History is not enabled on this scheduler")
     # Case 5458: Consider an empty string value for HISTORY to be the same as None
     # and raise an exception.
     if len(history_file.strip()) == 0 :
         raise Exception("The HISTORY setting is an empty string")
     logging.info("History file for daemon %s: %s"%(self.scheddName, history_file))
     files        = glob.glob(history_file + "*")
     history_data = ''
     for f in files:
         if os.path.isfile(f):
             mod = os.path.getmtime(f)
             # allow for some overlap when testing
             if mod >= (completed_since - COMPLETED_SINCE_OVERLAP):
                 # each output from condor_history has a trailing newline so we can
                 # just concatenate them
                 history_data = history_data + self.getHistoryFromFile(completed_since, jobs, f)
             else:
                 logging.info("History file %s was last modified before given completedSince, skipped" % os.path.basename(f))
     return history_data