示例#1
0
class Sweeper(AgentBase):
    # constructor
    def __init__(self, queue_config_mapper, single_mode=False):
        AgentBase.__init__(self, single_mode)
        self.dbProxy = DBProxy()
        self.queueConfigMapper = queue_config_mapper
        self.pluginFactory = PluginFactory()


    # main loop
    def run(self):
        lockedBy = 'sweeper-{0}'.format(self.ident)
        while True:
            mainLog = core_utils.make_logger(_logger, 'id={0}'.format(lockedBy), method_name='run')
            mainLog.debug('try to get workers to kill')
            # get workers to kill
            workersToKill = self.dbProxy.get_workers_to_kill(harvester_config.sweeper.maxWorkers,
                                                             harvester_config.sweeper.checkInterval)
            mainLog.debug('got {0} queues to kill workers'.format(len(workersToKill)))
            # loop over all workers
            for queueName, workSpecs in iteritems(workersToKill):
                # get sweeper
                if not self.queueConfigMapper.has_queue(queueName):
                    mainLog.error('queue config for {0} not found'.format(queueName))
                    continue
                queueConfig = self.queueConfigMapper.get_queue(queueName)
                sweeperCore = self.pluginFactory.get_plugin(queueConfig.sweeper)
                for workSpec in workSpecs:
                    tmpLog = core_utils.make_logger(_logger, 'workerID={0}'.format(workSpec.workerID),
                                                    method_name='run')
                    tmpLog.debug('start killing')
                    tmpStat, tmpOut = sweeperCore.kill_worker(workSpec)
                    tmpLog.debug('done with status={0} diag={1}'.format(tmpStat, tmpOut))
            mainLog.debug('done kill')
            # timeout for missed
            try:
                keepMissed = harvester_config.sweeper.keepMissed
            except:
                keepMissed = 24
            # get workers for cleanup
            statusTimeoutMap = {'finished': harvester_config.sweeper.keepFinished,
                                'failed': harvester_config.sweeper.keepFailed,
                                'cancelled': harvester_config.sweeper.keepCancelled,
                                'missed': keepMissed
                                }
            workersForCleanup = self.dbProxy.get_workers_for_cleanup(harvester_config.sweeper.maxWorkers,
                                                                     statusTimeoutMap)
            mainLog.debug('got {0} queues for workers cleanup'.format(len(workersForCleanup)))
            for queueName, workSpecs in iteritems(workersForCleanup):
                # get sweeper
                if not self.queueConfigMapper.has_queue(queueName):
                    mainLog.error('queue config for {0} not found'.format(queueName))
                    continue
                queueConfig = self.queueConfigMapper.get_queue(queueName)
                sweeperCore = self.pluginFactory.get_plugin(queueConfig.sweeper)
                for workSpec in workSpecs:
                    tmpLog = core_utils.make_logger(_logger, 'workerID={0}'.format(workSpec.workerID),
                                                    method_name='run')
                    tmpLog.debug('start cleanup')
                    tmpStat, tmpOut = sweeperCore.sweep_worker(workSpec)
                    tmpLog.debug('done with status={0} diag={1}'.format(tmpStat, tmpOut))
                    if tmpStat:
                        # delete from DB
                        self.dbProxy.delete_worker(workSpec.workerID)
            mainLog.debug('done cleanup')
            # check if being terminated
            if self.terminated(harvester_config.sweeper.sleepTime):
                mainLog.debug('terminated')
                return
示例#2
0
class Sweeper(AgentBase):
    # constructor
    def __init__(self, queue_config_mapper, single_mode=False):
        AgentBase.__init__(self, single_mode)
        self.dbProxy = DBProxy()
        self.queueConfigMapper = queue_config_mapper
        self.pluginFactory = PluginFactory()


    # main loop
    def run(self):
        lockedBy = 'sweeper-{0}'.format(self.get_pid())
        while True:
            sw_main = core_utils.get_stopwatch()
            mainLog = self.make_logger(_logger, 'id={0}'.format(lockedBy), method_name='run')
            # killing stage
            sw_kill = core_utils.get_stopwatch()
            mainLog.debug('try to get workers to kill')
            # get workers to kill
            workersToKill = self.dbProxy.get_workers_to_kill(harvester_config.sweeper.maxWorkers,
                                                             harvester_config.sweeper.checkInterval)
            mainLog.debug('got {0} queues to kill workers'.format(len(workersToKill)))
            # loop over all workers
            sw = core_utils.get_stopwatch()
            for queueName, configIdWorkSpecList in iteritems(workersToKill):
                for configID, workspec_list in iteritems(configIdWorkSpecList):
                    # get sweeper
                    if not self.queueConfigMapper.has_queue(queueName, configID):
                        mainLog.error('queue config for {0}/{1} not found'.format(queueName, configID))
                        continue
                    queueConfig = self.queueConfigMapper.get_queue(queueName, configID)
                    sweeperCore = self.pluginFactory.get_plugin(queueConfig.sweeper)
                    sw.reset()
                    n_workers = len(workspec_list)
                    try:
                        # try bulk method
                        tmpLog = self.make_logger(_logger, 'id={0}'.format(lockedBy), method_name='run')
                        tmpLog.debug('start killing')
                        tmpList = sweeperCore.kill_workers(workspec_list)
                    except AttributeError:
                        # fall back to single-worker method
                        for workspec in workspec_list:
                            tmpLog = self.make_logger(_logger, 'workerID={0}'.format(workspec.workerID),
                                                      method_name='run')
                            try:
                                tmpLog.debug('start killing one worker')
                                tmpStat, tmpOut = sweeperCore.kill_worker(workspec)
                                tmpLog.debug('done killing with status={0} diag={1}'.format(tmpStat, tmpOut))
                            except Exception:
                                core_utils.dump_error_message(tmpLog)
                    except Exception:
                        core_utils.dump_error_message(mainLog)
                    else:
                        # bulk method
                        n_killed = 0
                        for workspec, (tmpStat, tmpOut) in zip(workspec_list, tmpList):
                            tmpLog.debug('done killing workerID={0} with status={1} diag={2}'.format(
                                            workspec.workerID, tmpStat, tmpOut))
                            if tmpStat:
                                n_killed += 1
                        tmpLog.debug('killed {0}/{1} workers'.format(n_killed, n_workers))
                    mainLog.debug('done killing {0} workers'.format(n_workers) + sw.get_elapsed_time())
            mainLog.debug('done all killing' + sw_kill.get_elapsed_time())
            # cleanup stage
            sw_cleanup = core_utils.get_stopwatch()
            # timeout for missed
            try:
                keepMissed = harvester_config.sweeper.keepMissed
            except Exception:
                keepMissed = 24
            try:
                keepPending = harvester_config.sweeper.keepPending
            except Exception:
                keepPending = 24
            # get workers for cleanup
            statusTimeoutMap = {'finished': harvester_config.sweeper.keepFinished,
                                'failed': harvester_config.sweeper.keepFailed,
                                'cancelled': harvester_config.sweeper.keepCancelled,
                                'missed': keepMissed,
                                'pending': keepPending
                                }
            workersForCleanup = self.dbProxy.get_workers_for_cleanup(harvester_config.sweeper.maxWorkers,
                                                                     statusTimeoutMap)
            mainLog.debug('got {0} queues for workers cleanup'.format(len(workersForCleanup)))
            sw = core_utils.get_stopwatch()
            for queueName, configIdWorkSpecList in iteritems(workersForCleanup):
                for configID, workspec_list in iteritems(configIdWorkSpecList):
                    # get sweeper
                    if not self.queueConfigMapper.has_queue(queueName, configID):
                        mainLog.error('queue config for {0}/{1} not found'.format(queueName, configID))
                        continue
                    queueConfig = self.queueConfigMapper.get_queue(queueName, configID)
                    sweeperCore = self.pluginFactory.get_plugin(queueConfig.sweeper)
                    messenger = self.pluginFactory.get_plugin(queueConfig.messenger)
                    sw.reset()
                    n_workers = len(workspec_list)
                    # make sure workers to clean up are all terminated
                    mainLog.debug('making sure workers to clean up are all terminated')
                    try:
                        # try bulk method
                        tmpList = sweeperCore.kill_workers(workspec_list)
                    except AttributeError:
                        # fall back to single-worker method
                        for workspec in workspec_list:
                            tmpLog = self.make_logger(_logger, 'workerID={0}'.format(workspec.workerID),
                                                      method_name='run')
                            try:
                                tmpStat, tmpOut = sweeperCore.kill_worker(workspec)
                            except Exception:
                                core_utils.dump_error_message(tmpLog)
                    except Exception:
                        core_utils.dump_error_message(mainLog)
                    mainLog.debug('made sure workers to clean up are all terminated')
                    # start cleanup
                    for workspec in workspec_list:
                        tmpLog = self.make_logger(_logger, 'workerID={0}'.format(workspec.workerID),
                                                  method_name='run')
                        try:
                            tmpLog.debug('start cleaning up one worker')
                            # sweep worker
                            tmpStat, tmpOut = sweeperCore.sweep_worker(workspec)
                            tmpLog.debug('swept_worker with status={0} diag={1}'.format(tmpStat, tmpOut))
                            tmpLog.debug('start messenger cleanup')
                            mc_tmpStat, mc_tmpOut = messenger.clean_up(workspec)
                            tmpLog.debug('messenger cleaned up with status={0} diag={1}'.format(mc_tmpStat, mc_tmpOut))
                            if tmpStat:
                                self.dbProxy.delete_worker(workspec.workerID)
                        except Exception:
                            core_utils.dump_error_message(tmpLog)
                    mainLog.debug('done cleaning up {0} workers'.format(n_workers) + sw.get_elapsed_time())
            mainLog.debug('done all cleanup' + sw_cleanup.get_elapsed_time())
            # old-job-deletion stage
            sw_delete = core_utils.get_stopwatch()
            mainLog.debug('delete old jobs')
            jobTimeout = max(statusTimeoutMap.values()) + 1
            self.dbProxy.delete_old_jobs(jobTimeout)
            # delete orphaned job info
            self.dbProxy.delete_orphaned_job_info()
            mainLog.debug('done deletion of old jobs' + sw_delete.get_elapsed_time())
            # time the cycle
            mainLog.debug('done a sweeper cycle' + sw_main.get_elapsed_time())
            # check if being terminated
            if self.terminated(harvester_config.sweeper.sleepTime):
                mainLog.debug('terminated')
                return
示例#3
0
class Sweeper(AgentBase):
    # constructor
    def __init__(self, queue_config_mapper, single_mode=False):
        AgentBase.__init__(self, single_mode)
        self.dbProxy = DBProxy()
        self.queueConfigMapper = queue_config_mapper
        self.pluginFactory = PluginFactory()

    # main loop
    def run(self):
        lockedBy = 'sweeper-{0}'.format(self.get_pid())
        while True:
            sw_main = core_utils.get_stopwatch()
            mainLog = self.make_logger(_logger,
                                       'id={0}'.format(lockedBy),
                                       method_name='run')
            # get commands to kill
            sw_getcomm = core_utils.get_stopwatch()
            mainLog.debug('try to get commands')
            comStr = CommandSpec.COM_killWorkers
            commandSpecs = self.dbProxy.get_commands_for_receiver(
                'sweeper', comStr)
            mainLog.debug('got {0} {1} commands'.format(
                len(commandSpecs), comStr))
            for commandSpec in commandSpecs:
                n_to_kill = self.dbProxy.kill_workers_by_query(
                    commandSpec.params)
                mainLog.debug('will kill {0} workers with {1}'.format(
                    n_to_kill, commandSpec.params))
            mainLog.debug('done handling commands' +
                          sw_getcomm.get_elapsed_time())
            # killing stage
            sw_kill = core_utils.get_stopwatch()
            mainLog.debug('try to get workers to kill')
            # get workers to kill
            workersToKill = self.dbProxy.get_workers_to_kill(
                harvester_config.sweeper.maxWorkers,
                harvester_config.sweeper.checkInterval)
            mainLog.debug('got {0} queues to kill workers'.format(
                len(workersToKill)))
            # loop over all workers
            sw = core_utils.get_stopwatch()
            for queueName, configIdWorkSpecList in iteritems(workersToKill):
                for configID, workspec_list in iteritems(configIdWorkSpecList):
                    # get sweeper
                    if not self.queueConfigMapper.has_queue(
                            queueName, configID):
                        mainLog.error(
                            'queue config for {0}/{1} not found'.format(
                                queueName, configID))
                        continue
                    queueConfig = self.queueConfigMapper.get_queue(
                        queueName, configID)
                    sweeperCore = self.pluginFactory.get_plugin(
                        queueConfig.sweeper)
                    sw.reset()
                    n_workers = len(workspec_list)
                    try:
                        # try bulk method
                        tmpLog = self.make_logger(_logger,
                                                  'id={0}'.format(lockedBy),
                                                  method_name='run')
                        tmpLog.debug('start killing')
                        tmpList = sweeperCore.kill_workers(workspec_list)
                    except AttributeError:
                        # fall back to single-worker method
                        for workspec in workspec_list:
                            tmpLog = self.make_logger(_logger,
                                                      'workerID={0}'.format(
                                                          workspec.workerID),
                                                      method_name='run')
                            try:
                                tmpLog.debug('start killing one worker')
                                tmpStat, tmpOut = sweeperCore.kill_worker(
                                    workspec)
                                tmpLog.debug(
                                    'done killing with status={0} diag={1}'.
                                    format(tmpStat, tmpOut))
                            except Exception:
                                core_utils.dump_error_message(tmpLog)
                    except Exception:
                        core_utils.dump_error_message(mainLog)
                    else:
                        # bulk method
                        n_killed = 0
                        for workspec, (tmpStat,
                                       tmpOut) in zip(workspec_list, tmpList):
                            tmpLog.debug(
                                'done killing workerID={0} with status={1} diag={2}'
                                .format(workspec.workerID, tmpStat, tmpOut))
                            if tmpStat:
                                n_killed += 1
                        tmpLog.debug('killed {0}/{1} workers'.format(
                            n_killed, n_workers))
                    mainLog.debug(
                        'done killing {0} workers'.format(n_workers) +
                        sw.get_elapsed_time())
            mainLog.debug('done all killing' + sw_kill.get_elapsed_time())
            # cleanup stage
            sw_cleanup = core_utils.get_stopwatch()
            # timeout for missed
            try:
                keepMissed = harvester_config.sweeper.keepMissed
            except Exception:
                keepMissed = 24
            try:
                keepPending = harvester_config.sweeper.keepPending
            except Exception:
                keepPending = 24
            # get workers for cleanup
            statusTimeoutMap = {
                'finished': harvester_config.sweeper.keepFinished,
                'failed': harvester_config.sweeper.keepFailed,
                'cancelled': harvester_config.sweeper.keepCancelled,
                'missed': keepMissed,
                'pending': keepPending
            }
            workersForCleanup = self.dbProxy.get_workers_for_cleanup(
                harvester_config.sweeper.maxWorkers, statusTimeoutMap)
            mainLog.debug('got {0} queues for workers cleanup'.format(
                len(workersForCleanup)))
            sw = core_utils.get_stopwatch()
            for queueName, configIdWorkSpecList in iteritems(
                    workersForCleanup):
                for configID, workspec_list in iteritems(configIdWorkSpecList):
                    # get sweeper
                    if not self.queueConfigMapper.has_queue(
                            queueName, configID):
                        mainLog.error(
                            'queue config for {0}/{1} not found'.format(
                                queueName, configID))
                        continue
                    queueConfig = self.queueConfigMapper.get_queue(
                        queueName, configID)
                    sweeperCore = self.pluginFactory.get_plugin(
                        queueConfig.sweeper)
                    messenger = self.pluginFactory.get_plugin(
                        queueConfig.messenger)
                    sw.reset()
                    n_workers = len(workspec_list)
                    # make sure workers to clean up are all terminated
                    mainLog.debug(
                        'making sure workers to clean up are all terminated')
                    try:
                        # try bulk method
                        tmpList = sweeperCore.kill_workers(workspec_list)
                    except AttributeError:
                        # fall back to single-worker method
                        for workspec in workspec_list:
                            tmpLog = self.make_logger(_logger,
                                                      'workerID={0}'.format(
                                                          workspec.workerID),
                                                      method_name='run')
                            try:
                                tmpStat, tmpOut = sweeperCore.kill_worker(
                                    workspec)
                            except Exception:
                                core_utils.dump_error_message(tmpLog)
                    except Exception:
                        core_utils.dump_error_message(mainLog)
                    mainLog.debug(
                        'made sure workers to clean up are all terminated')
                    # start cleanup
                    for workspec in workspec_list:
                        tmpLog = self.make_logger(_logger,
                                                  'workerID={0}'.format(
                                                      workspec.workerID),
                                                  method_name='run')
                        try:
                            tmpLog.debug('start cleaning up one worker')
                            # sweep worker
                            tmpStat, tmpOut = sweeperCore.sweep_worker(
                                workspec)
                            tmpLog.debug(
                                'swept_worker with status={0} diag={1}'.format(
                                    tmpStat, tmpOut))
                            tmpLog.debug('start messenger cleanup')
                            mc_tmpStat, mc_tmpOut = messenger.clean_up(
                                workspec)
                            tmpLog.debug(
                                'messenger cleaned up with status={0} diag={1}'
                                .format(mc_tmpStat, mc_tmpOut))
                            if tmpStat:
                                self.dbProxy.delete_worker(workspec.workerID)
                        except Exception:
                            core_utils.dump_error_message(tmpLog)
                    mainLog.debug(
                        'done cleaning up {0} workers'.format(n_workers) +
                        sw.get_elapsed_time())
            mainLog.debug('done all cleanup' + sw_cleanup.get_elapsed_time())
            # old-job-deletion stage
            sw_delete = core_utils.get_stopwatch()
            mainLog.debug('delete old jobs')
            jobTimeout = max(statusTimeoutMap.values()) + 1
            self.dbProxy.delete_old_jobs(jobTimeout)
            # delete orphaned job info
            self.dbProxy.delete_orphaned_job_info()
            mainLog.debug('done deletion of old jobs' +
                          sw_delete.get_elapsed_time())
            # time the cycle
            mainLog.debug('done a sweeper cycle' + sw_main.get_elapsed_time())
            # check if being terminated
            if self.terminated(harvester_config.sweeper.sleepTime):
                mainLog.debug('terminated')
                return
示例#4
0
class Sweeper(AgentBase):
    # constructor
    def __init__(self, queue_config_mapper, single_mode=False):
        AgentBase.__init__(self, single_mode)
        self.dbProxy = DBProxy()
        self.queueConfigMapper = queue_config_mapper
        self.pluginFactory = PluginFactory()

    # main loop
    def run(self):
        lockedBy = 'sweeper-{0}'.format(self.get_pid())
        while True:
            sw_main = core_utils.get_stopwatch()
            mainLog = self.make_logger(_logger,
                                       'id={0}'.format(lockedBy),
                                       method_name='run')
            # get commands to kill
            sw_getcomm = core_utils.get_stopwatch()
            mainLog.debug('try to get commands')
            comStr = CommandSpec.COM_killWorkers
            commandSpecs = self.dbProxy.get_commands_for_receiver(
                'sweeper', comStr)
            mainLog.debug('got {0} {1} commands'.format(
                len(commandSpecs), comStr))
            for commandSpec in commandSpecs:
                n_to_kill = self.dbProxy.kill_workers_by_query(
                    commandSpec.params)
                mainLog.debug('will kill {0} workers with {1}'.format(
                    n_to_kill, commandSpec.params))
            mainLog.debug('done handling commands' +
                          sw_getcomm.get_elapsed_time())
            # killing stage
            sw_kill = core_utils.get_stopwatch()
            mainLog.debug('try to get workers to kill')
            # get workers to kill
            workersToKill = self.dbProxy.get_workers_to_kill(
                harvester_config.sweeper.maxWorkers,
                harvester_config.sweeper.checkInterval)
            mainLog.debug('got {0} queues to kill workers'.format(
                len(workersToKill)))
            # loop over all workers
            sw = core_utils.get_stopwatch()
            for queueName, configIdWorkSpecList in iteritems(workersToKill):
                for configID, workspec_list in iteritems(configIdWorkSpecList):
                    # get sweeper
                    if not self.queueConfigMapper.has_queue(
                            queueName, configID):
                        mainLog.error(
                            'queue config for {0}/{1} not found'.format(
                                queueName, configID))
                        continue
                    queueConfig = self.queueConfigMapper.get_queue(
                        queueName, configID)
                    try:
                        sweeperCore = self.pluginFactory.get_plugin(
                            queueConfig.sweeper)
                    except Exception:
                        mainLog.error(
                            'failed to launch sweeper plugin for {0}/{1}'.
                            format(queueName, configID))
                        core_utils.dump_error_message(mainLog)
                        continue
                    sw.reset()
                    n_workers = len(workspec_list)
                    try:
                        # try bulk method
                        tmpLog = self.make_logger(_logger,
                                                  'id={0}'.format(lockedBy),
                                                  method_name='run')
                        tmpLog.debug('start killing')
                        tmpList = sweeperCore.kill_workers(workspec_list)
                    except AttributeError:
                        # fall back to single-worker method
                        for workspec in workspec_list:
                            tmpLog = self.make_logger(_logger,
                                                      'workerID={0}'.format(
                                                          workspec.workerID),
                                                      method_name='run')
                            try:
                                tmpLog.debug('start killing one worker')
                                tmpStat, tmpOut = sweeperCore.kill_worker(
                                    workspec)
                                tmpLog.debug(
                                    'done killing with status={0} diag={1}'.
                                    format(tmpStat, tmpOut))
                            except Exception:
                                core_utils.dump_error_message(tmpLog)
                    except Exception:
                        core_utils.dump_error_message(mainLog)
                    else:
                        # bulk method
                        n_killed = 0
                        for workspec, (tmpStat,
                                       tmpOut) in zip(workspec_list, tmpList):
                            tmpLog.debug(
                                'done killing workerID={0} with status={1} diag={2}'
                                .format(workspec.workerID, tmpStat, tmpOut))
                            if tmpStat:
                                n_killed += 1
                        tmpLog.debug('killed {0}/{1} workers'.format(
                            n_killed, n_workers))
                    mainLog.debug(
                        'done killing {0} workers'.format(n_workers) +
                        sw.get_elapsed_time())
            mainLog.debug('done all killing' + sw_kill.get_elapsed_time())
            # cleanup stage
            sw_cleanup = core_utils.get_stopwatch()
            # timeout for missed
            try:
                keepMissed = harvester_config.sweeper.keepMissed
            except Exception:
                keepMissed = 24
            try:
                keepPending = harvester_config.sweeper.keepPending
            except Exception:
                keepPending = 24
            # get workers for cleanup
            statusTimeoutMap = {
                'finished': harvester_config.sweeper.keepFinished,
                'failed': harvester_config.sweeper.keepFailed,
                'cancelled': harvester_config.sweeper.keepCancelled,
                'missed': keepMissed,
                'pending': keepPending
            }
            workersForCleanup = self.dbProxy.get_workers_for_cleanup(
                harvester_config.sweeper.maxWorkers, statusTimeoutMap)
            mainLog.debug('got {0} queues for workers cleanup'.format(
                len(workersForCleanup)))
            sw = core_utils.get_stopwatch()
            for queueName, configIdWorkSpecList in iteritems(
                    workersForCleanup):
                for configID, workspec_list in iteritems(configIdWorkSpecList):
                    # get sweeper
                    if not self.queueConfigMapper.has_queue(
                            queueName, configID):
                        mainLog.error(
                            'queue config for {0}/{1} not found'.format(
                                queueName, configID))
                        continue
                    queueConfig = self.queueConfigMapper.get_queue(
                        queueName, configID)
                    sweeperCore = self.pluginFactory.get_plugin(
                        queueConfig.sweeper)
                    messenger = self.pluginFactory.get_plugin(
                        queueConfig.messenger)
                    sw.reset()
                    n_workers = len(workspec_list)
                    # make sure workers to clean up are all terminated
                    mainLog.debug(
                        'making sure workers to clean up are all terminated')
                    try:
                        # try bulk method
                        tmpList = sweeperCore.kill_workers(workspec_list)
                    except AttributeError:
                        # fall back to single-worker method
                        for workspec in workspec_list:
                            tmpLog = self.make_logger(_logger,
                                                      'workerID={0}'.format(
                                                          workspec.workerID),
                                                      method_name='run')
                            try:
                                tmpStat, tmpOut = sweeperCore.kill_worker(
                                    workspec)
                            except Exception:
                                core_utils.dump_error_message(tmpLog)
                    except Exception:
                        core_utils.dump_error_message(mainLog)
                    mainLog.debug(
                        'made sure workers to clean up are all terminated')
                    # start cleanup
                    for workspec in workspec_list:
                        tmpLog = self.make_logger(_logger,
                                                  'workerID={0}'.format(
                                                      workspec.workerID),
                                                  method_name='run')
                        try:
                            tmpLog.debug('start cleaning up one worker')
                            # sweep worker
                            tmpStat, tmpOut = sweeperCore.sweep_worker(
                                workspec)
                            tmpLog.debug(
                                'swept_worker with status={0} diag={1}'.format(
                                    tmpStat, tmpOut))
                            tmpLog.debug('start messenger cleanup')
                            mc_tmpStat, mc_tmpOut = messenger.clean_up(
                                workspec)
                            tmpLog.debug(
                                'messenger cleaned up with status={0} diag={1}'
                                .format(mc_tmpStat, mc_tmpOut))
                            if tmpStat:
                                self.dbProxy.delete_worker(workspec.workerID)
                        except Exception:
                            core_utils.dump_error_message(tmpLog)
                    mainLog.debug(
                        'done cleaning up {0} workers'.format(n_workers) +
                        sw.get_elapsed_time())
            mainLog.debug('done all cleanup' + sw_cleanup.get_elapsed_time())
            # old-job-deletion stage
            sw_delete = core_utils.get_stopwatch()
            mainLog.debug('delete old jobs')
            jobTimeout = max(statusTimeoutMap.values()) + 1
            self.dbProxy.delete_old_jobs(jobTimeout)
            # delete orphaned job info
            self.dbProxy.delete_orphaned_job_info()
            mainLog.debug('done deletion of old jobs' +
                          sw_delete.get_elapsed_time())
            # disk cleanup
            if hasattr(harvester_config.sweeper, 'diskCleanUpInterval') and \
                    hasattr(harvester_config.sweeper, 'diskHighWatermark'):
                locked = self.dbProxy.get_process_lock(
                    'sweeper', self.get_pid(),
                    harvester_config.sweeper.diskCleanUpInterval * 60 * 60)
                if locked:
                    try:
                        all_active_files = None
                        for item in harvester_config.sweeper.diskHighWatermark.split(
                                ','):
                            # dir name and watermark in GB
                            dir_name, watermark = item.split('|')
                            mainLog.debug(
                                'checking {0} for cleanup with watermark {1} GB'
                                .format(dir_name, watermark))
                            watermark = int(watermark) * 10**9
                            total_size = 0
                            file_dict = {}
                            # scan dir
                            for root, dirs, filenames in walk(dir_name):
                                for base_name in filenames:
                                    full_name = os.path.join(root, base_name)
                                    f_size = os.path.getsize(full_name)
                                    total_size += f_size
                                    mtime = os.path.getmtime(full_name)
                                    file_dict.setdefault(mtime, set())
                                    file_dict[mtime].add(
                                        (base_name, full_name, f_size))
                            # delete if necessary
                            if total_size < watermark:
                                mainLog.debug(
                                    'skip cleanup {0} due to total_size {1} GB < watermark {2} GB'
                                    .format(dir_name, total_size // (10**9),
                                            watermark // (10**9)))
                            else:
                                mainLog.debug(
                                    'cleanup {0} due to total_size {1} GB >= watermark {2} GB'
                                    .format(dir_name, total_size // (10**9),
                                            watermark // (10**9)))
                                # get active input files
                                if all_active_files is None:
                                    all_active_files = self.dbProxy.get_all_active_input_files(
                                    )
                                deleted_size = 0
                                mtimes = sorted(file_dict.keys())
                                for mtime in mtimes:
                                    for base_name, full_name, f_size in file_dict[
                                            mtime]:
                                        # keep if active
                                        if base_name in all_active_files:
                                            continue
                                        try:
                                            os.remove(full_name)
                                        except Exception:
                                            core_utils.dump_error_message(
                                                mainLog)
                                        deleted_size += f_size
                                        if total_size - deleted_size < watermark:
                                            break
                                    if total_size - deleted_size < watermark:
                                        break
                    except Exception:
                        core_utils.dump_error_message(mainLog)
            # time the cycle
            mainLog.debug('done a sweeper cycle' + sw_main.get_elapsed_time())
            # check if being terminated
            if self.terminated(harvester_config.sweeper.sleepTime):
                mainLog.debug('terminated')
                return