def post_collect(self, config): self._executor = TwistedExecutor(self.conn_params['consess']) d = maybeDeferred(self._connect) d.addCallbacks(self.connectCallback, self.connectionFailed) # do not return internal connection handling deferred, but global # tasks deferred. self._task_defer = Deferred() return self._task_defer
def __init__(self, taskName, configId, scheduleIntervalSeconds, taskConfig): """ @param taskName: the unique identifier for this task @type taskName: string @param configId: configuration to watch @type configId: string @param scheduleIntervalSeconds: the interval at which this task will be collected @type scheduleIntervalSeconds: int @param taskConfig: the configuration for this task """ super(SshPerformanceCollectionTask, self).__init__(taskName, configId, scheduleIntervalSeconds, taskConfig) # Needed for interface self.name = taskName self.configId = configId self.state = TaskStates.STATE_IDLE self.interval = scheduleIntervalSeconds # The taskConfig corresponds to a DeviceProxy self._device = taskConfig self._devId = self._device.id self._manageIp = self._device.manageIp self._dataService = zope.component.queryUtility(IDataService) self._eventService = zope.component.queryUtility(IEventService) self._preferences = zope.component.queryUtility( ICollectorPreferences, COLLECTOR_NAME) self._lastErrorMsg = '' self._maxbackoffseconds = self._preferences.options.maxbackoffminutes * 60 self._concurrentSessions = taskConfig.zSshConcurrentSessions self._executor = TwistedExecutor(self._concurrentSessions) self._useSsh = taskConfig.datasources[0].useSsh self._connection = None self._datasources = taskConfig.datasources self.pool = getPool('SSH Connections') self.executed = 0 self._task_defer = None
def __init__(self, callableTaskFactory=CallableTaskFactory()): self._loopingCalls = {} self._tasks = {} self._taskCallback = {} self._taskStats = {} self._callableTaskFactory = callableTaskFactory self._shuttingDown = False # create a cleanup task that will periodically sweep the # cleanup dictionary for tasks that need to be cleaned self._tasksToCleanup = KeyedSet(getConfigId) self._cleanupTask = task.LoopingCall(self._cleanupTasks) self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL) self._executor = TwistedExecutor(1) # Ensure that we can cleanly shutdown all of our tasks reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown, 'before') reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown, 'during') reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown, 'after')
def __init__(self, taskName, configId, scheduleIntervalSeconds, taskConfig): """ @param taskName: the unique identifier for this task @type taskName: string @param configId: configuration to watch @type configId: string @param scheduleIntervalSeconds: the interval at which this task will be collected @type scheduleIntervalSeconds: int @param taskConfig: the configuration for this task """ super(SshPerformanceCollectionTask, self).__init__(taskName, configId, scheduleIntervalSeconds, taskConfig) # Needed for interface self.name = taskName self.configId = configId self.state = TaskStates.STATE_IDLE self.interval = scheduleIntervalSeconds # The taskConfig corresponds to a DeviceProxy self._device = taskConfig self._devId = taskConfig.id self._manageIp = taskConfig.manageIp self._datasources = taskConfig.datasources self._useSsh = taskConfig.datasources[0].useSsh client = MySshClient if self._useSsh else None self._runner = partial(runner.getRunner, taskConfig, client) self._connector = self._runner() self._dataService = zope.component.queryUtility(IDataService) self._eventService = zope.component.queryUtility(IEventService) preferences = zope.component.queryUtility(ICollectorPreferences, COLLECTOR_NAME) self._maxbackoffseconds = preferences.options.maxbackoffminutes * 60 self._showfullcommand = preferences.options.showfullcommand self._chosenDatasource = preferences.options.datasource self._executor = TwistedExecutor(taskConfig.zSshConcurrentSessions) self.executed = 0 self._lastErrorMsg = '' self.manage_ip_event = { 'eventClass': Cmd_Fail, 'device': self._devId, 'summary': 'IP address not set, collection will be attempted\ with host name', 'component': COLLECTOR_NAME, 'eventKey': 'Empty_IP_address' }
def post_collect(self, device, log): """ Should get called from collect in subclass before finishing. """ log.debug("%s: DDN collection for device", device.id) # prepare the connection d = self._internal_defer = maybeDeferred(self._connect) d.addCallbacks(self.connectCallback, self.connectionFailed) # prepare concurrent executor context self._executor = TwistedExecutor(self._conn_params['consess']) return self._task_defer
def __init__(self, taskName, configId, scheduleIntervalSeconds, taskConfig): """ @param taskName: the unique identifier for this task @type taskName: string @param configId: configuration to watch @type configId: string @param scheduleIntervalSeconds: the interval at which this task will be collected @type scheduleIntervalSeconds: int @param taskConfig: the configuration for this task """ super(SshPerformanceCollectionTask, self).__init__( taskName, configId, scheduleIntervalSeconds, taskConfig ) # Needed for interface self.name = taskName self.configId = configId self.state = TaskStates.STATE_IDLE self.interval = scheduleIntervalSeconds # The taskConfig corresponds to a DeviceProxy self._device = taskConfig self._devId = self._device.id self._manageIp = self._device.manageIp self._dataService = zope.component.queryUtility(IDataService) self._eventService = zope.component.queryUtility(IEventService) self._preferences = zope.component.queryUtility(ICollectorPreferences, COLLECTOR_NAME) self._lastErrorMsg = '' self._maxbackoffseconds = self._preferences.options.maxbackoffminutes * 60 self._concurrentSessions = taskConfig.zSshConcurrentSessions self._executor = TwistedExecutor(self._concurrentSessions) self._useSsh = taskConfig.datasources[0].useSsh self._connection = None self._datasources = taskConfig.datasources self.pool = getPool('SSH Connections') self.executed = 0 self._task_defer = None
def __init__(self, callableTaskFactory=CallableTaskFactory()): self._loopingCalls = {} self._tasks = {} self._taskCallback = {} self._taskStats = {} self._callableTaskFactory = callableTaskFactory self._shuttingDown = False # create a cleanup task that will periodically sweep the # cleanup dictionary for tasks that need to be cleaned self._tasksToCleanup = set() self._cleanupTask = task.LoopingCall(self._cleanupTasks) self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL) self._executor = TwistedExecutor(1) # Ensure that we can cleanly shutdown all of our tasks reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown, 'before') reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown, 'during') reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown, 'after')
class Scheduler(object): """ A simple interval-based scheduler that makes use of the Twisted framework's LoopingCall construct. """ zope.interface.implements(IScheduler) CLEANUP_TASKS_INTERVAL = 10 # seconds def __init__(self, callableTaskFactory=CallableTaskFactory()): self._loopingCalls = {} self._tasks = {} self._taskCallback = {} self._taskStats = {} self._callableTaskFactory = callableTaskFactory self._shuttingDown = False # create a cleanup task that will periodically sweep the # cleanup dictionary for tasks that need to be cleaned self._tasksToCleanup = set() self._cleanupTask = task.LoopingCall(self._cleanupTasks) self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL) self._executor = TwistedExecutor(1) # Ensure that we can cleanly shutdown all of our tasks reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown, 'before') reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown, 'during') reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown, 'after') def __contains__(self, task): """ Returns True if the task has been added to the scheduler. Otherwise False is returned. """ # If task has no 'name' attribute, assume the task name was passed in. name = getattr(task, "name", task) return name in self._tasks def shutdown(self, phase): """ The reactor shutdown has three phases for event types: before - tasks can shut down safely at this time with access to all services (eg EventService, their own services) during - EventService and other services are gone before this starts after - not a lot left -- be careful Tasks that have the attribute 'stopPhase' can set the state for which they should be run, otherwise they will be shut down in the 'before' phase. Tasks that have the attribute 'stopOrder' can set the order in which they are shut down (lowest first). A stopOrder of 0 (zero) is assumed for tasks which do not declare a stopOrder. Returns a list of deferreds to wait on. """ self._shuttingDown = True doomedTasks = [] stopQ = {} log.debug("In shutdown stage %s", phase) for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task stopPhase = getattr(task, 'stopPhase', 'before') if stopPhase in ('before', 'after', 'during') and \ stopPhase != phase: continue stopOrder = getattr(task, 'stopOrder', 0) queue = stopQ.setdefault(stopOrder, []) queue.append( (taskName, taskWrapper, task) ) for stopOrder in sorted(stopQ): for (taskName, taskWrapper, task) in stopQ[stopOrder]: loopTask = self._loopingCalls[taskName] if loopTask.running: log.debug("Stopping running task %s", taskName) loopTask.stop() log.debug("Removing task %s", taskName) doomedTasks.append(taskName) self.taskRemoved(taskWrapper) for taskName in doomedTasks: self._tasksToCleanup.add(self._tasks[taskName].task) del self._loopingCalls[taskName] del self._tasks[taskName] del self._taskStats[taskName] cleanupList = self._cleanupTasks() return defer.DeferredList(cleanupList) @property def executor(self): return self._executor def _getMaxTasks(self): return self._executor.getMax() def _setMaxTasks(self, max): return self._executor.setMax(max) maxTasks = property(_getMaxTasks, _setMaxTasks) def _ltCallback(self, result, task_name): """last call back in the chain, if it gets called as an errBack the looping will stop - shouldn't be called since CallableTask doesn't return a deferred, here for sanity and debug""" if task_name in self._loopingCalls: loopingCall = self._loopingCalls[task_name] log.debug("call finished %s : %s" %(loopingCall, result)) if isinstance(result, Failure): log.warn("Failure in looping call, will not reschedule %s" % task_name) log.error("%s" % result) def _startTask(self, result, task_name, interval): """start the task using a callback so that its put at the bottom of the Twisted event queue, to allow other processing to continue and to support a task start-time jitter""" if task_name in self._loopingCalls: loopingCall = self._loopingCalls[task_name] if not loopingCall.running: log.debug("Task %s starting on %d second intervals", task_name, interval) d = loopingCall.start(interval) d.addBoth(self._ltCallback, task_name) def addTask(self, newTask, callback=None, now=False): """ Add a new IScheduledTask to the scheduler for execution. @param newTask the new task to schedule @type newTask IScheduledTask @param callback a callback to be notified each time the task completes @type callback a Python callable """ if newTask.name in self._tasks: raise ValueError("Task %s already exists" % newTask.name) log.debug("add task %s, %s using %s second interval", newTask.name, newTask, newTask.interval) callableTask = self._callableTaskFactory.getCallableTask(newTask, self) loopingCall = task.LoopingCall(callableTask) self._loopingCalls[newTask.name] = loopingCall self._tasks[newTask.name] = callableTask self._taskCallback[newTask.name] = callback self.taskAdded(callableTask) d = defer.Deferred() d.addCallback(self._startTask, newTask.name, newTask.interval) startDelay = getattr(newTask, 'startDelay', None) if startDelay is None: startDelay = 0 if now else self._getStartDelay(newTask) # explicitly set, next expected call in case task was never executed/schedule loopingCall._expectNextCallAt = time.time() + startDelay reactor.callLater(startDelay, d.callback, None) # just in case someone does not implement scheduled, lets be careful scheduled = getattr(newTask, 'scheduled', lambda x: None) scheduled(self) def _getStartDelay(self, task): """ amount of time to delay the start of a task. Prevents bunching up of task execution when a large amount of tasks are scheduled at the same time. """ #simple delay of random number between 0 and half the task interval delay = random.randint(0, int(task.interval/2)) return delay def taskAdded(self, taskWrapper): """ Called whenever the scheduler adds a task. """ task = taskWrapper.task # watch the task's attribute changes task.attachAttributeObserver('state', self._taskStateChangeListener) task.attachAttributeObserver('interval', self._taskIntervalChangeListener) # create the statistics data for this task self._taskStats[task.name] = TaskStatistics(task) taskWrapper.taskStats = self._taskStats[task.name] def taskRemoved(self, taskWrapper): """ Called whenever the scheduler removes a task. """ task = taskWrapper.task task.detachAttributeObserver('state', self._taskStateChangeListener) task.detachAttributeObserver('interval', self._taskIntervalChangeListener) def taskPaused(self, taskWrapper): """ Called whenever the scheduler pauses a task. """ pass def taskResumed(self, taskWrapper): """ Called whenever the scheduler resumes a task. """ pass def getTasksForConfig(self, configId): """ Get all tasks associated with the specified identifier. """ tasks = [] for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task if task.configId == configId: tasks.append(task) return tasks def getNextExpectedRun(self, taskName): """ Get the next expected execution time for given task """ loopingCall = self._loopingCalls.get(taskName, None) if loopingCall: return loopingCall._expectNextCallAt def removeTasks(self, taskNames): """ Remove tasks """ doomedTasks = [] # child ids are any task that are children of the current task being # removed childIds = [] for taskName in taskNames: taskWrapper = self._tasks[taskName] task = taskWrapper.task subIds = getattr(task, "childIds", None) if subIds: childIds.extend(subIds) log.debug("Stopping task %s, %s", taskName, task) if self._loopingCalls[taskName].running: self._loopingCalls[taskName].stop() doomedTasks.append(taskName) self.taskRemoved(taskWrapper) for taskName in doomedTasks: task = self._tasks[taskName].task self._tasksToCleanup.add(task) del self._loopingCalls[taskName] del self._tasks[taskName] self._displayTaskStatistics(task) del self._taskStats[taskName] # TODO: ponder task statistics and keeping them around? map(self.removeTasksForConfig, childIds) # TODO: don't let any tasks for the same config start until # these old tasks are really gone def removeTasksForConfig(self, configId): """ Remove all tasks associated with the specified identifier. @paramater configId: the identifier to search for @type configId: string """ self.removeTasks(taskName for taskName, taskWrapper in self._tasks.iteritems() if taskWrapper.task.configId == configId) def pauseTasksForConfig(self, configId): for (taskName, taskWrapper) in self._tasks.items(): task = taskWrapper.task if task.configId == configId: log.debug("Pausing task %s", taskName) taskWrapper.paused = True self.taskPaused(taskWrapper) def resumeTasksForConfig(self, configId): for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task if task.configId == configId: log.debug("Resuming task %s", taskName) taskWrapper.paused = False self.taskResumed(taskWrapper) def taskDone(self, taskName): if callable(self._taskCallback[taskName]): self._taskCallback[taskName](taskName=taskName) def _taskStateChangeListener(self, observable, attrName, oldValue, newValue): task = observable log.debug("Task %s changing state from %s to %s", task.name, oldValue, newValue) taskStat = self._taskStats[task.name] taskStat.trackStateChange(oldValue, newValue) def _taskIntervalChangeListener(self, observable, attrName, oldValue, newValue): """ Allows tasks to change their collection interval on the fly """ task = observable log.debug("Task %s changing run interval from %s to %s", task.name, oldValue, newValue) loopingCall = task._dataService._scheduler._loopingCalls[task.name] loopingCall.interval = newValue def displayStatistics(self, verbose): totalRuns = 0 totalFailedRuns = 0 totalMissedRuns = 0 totalTasks = 0 stateStats = {} for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] totalTasks += 1 totalRuns += taskStats.totalRuns totalFailedRuns += taskStats.failedRuns totalMissedRuns += taskStats.missedRuns for state, stats in taskStats.states.iteritems(): if state in stateStats: totalStateStats = stateStats[state] else: totalStateStats = StateStatistics(state) stateStats[state] = totalStateStats totalStateStats.totalElapsedTime += stats.totalElapsedTime totalStateStats.totalElapsedTimeSquared += stats.totalElapsedTimeSquared totalStateStats.totalCalls += stats.totalCalls totalStateStats.minElapsedTime = min(totalStateStats.minElapsedTime, stats.minElapsedTime) totalStateStats.maxElapsedTime = max(totalStateStats.maxElapsedTime, stats.maxElapsedTime) log.info("Tasks: %d Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d " \ "Queued_Tasks: %d Running_Tasks: %d ", totalTasks, totalRuns, totalFailedRuns, totalMissedRuns, self.executor.queued, self.executor.running) if verbose: buffer = "Task States Summary:\n" buffer = self._displayStateStatistics(buffer, stateStats) buffer += "\nTasks:\n" for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] buffer += "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \ % (task.name, task.state, taskStats.totalRuns, taskStats.failedRuns, taskStats.missedRuns) buffer += "\nDetailed Task States:\n" for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] if not taskStats.states: # Hasn't run yet continue buffer = self._displayStateStatistics(buffer, taskStats.states, "%s " % task.name) if hasattr(task, 'displayStatistics'): buffer += task.displayStatistics() buffer += "\n" log.info("Detailed Scheduler Statistics:\n%s", buffer) # TODO: the above logic doesn't print out any stats for the 'current' # state... i.e. enter the PAUSED state and wait there an hour, and # you'll see no data # TODO: should we reset the statistics here, or do it on a periodic # interval, like once an hour or day or something? def _displayTaskStatistics(self, task): verbose = log.isEnabledFor(logging.DEBUG) if not verbose: return taskStats = self._taskStats[task.name] buffer = "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \ % (task.name, task.state, taskStats.totalRuns, taskStats.failedRuns, taskStats.missedRuns) buffer += "\nDetailed Task States:\n" if taskStats.states: # Hasn't run yet buffer = self._displayStateStatistics(buffer, taskStats.states, "%s " % task.name) if hasattr(task, 'displayStatistics'): buffer += task.displayStatistics() buffer += "\n" log.info("Detailed Task Statistics:\n%s", buffer) def _displayStateStatistics(self, buffer, stateStats, prefix=''): for state, stats in stateStats.iteritems(): buffer += "%sState: %s Total: %d Total Elapsed: %.4f Min: %.4f Max: %.4f Mean: %.4f StdDev: %.4f\n" \ % (prefix, state, stats.totalCalls, stats.totalElapsedTime, stats.minElapsedTime, stats.maxElapsedTime, stats.mean, stats.stddev) return buffer def _cleanupTasks(self): """ Periodically cleanup tasks that have been queued up for cleaning. """ # Build a list of the tasks that need to be cleaned up so that there # is no issue with concurrent modifications to the _tasksToCleanup # dictionary when tasks are quickly cleaned up. if self._tasksToCleanup: log.debug("tasks to clean %s" % self._tasksToCleanup) todoList = [task for task in self._tasksToCleanup if self._isTaskCleanable(task)] cleanupWaitList = [] for task in todoList: # changing the state of the task will keep the next cleanup run # from processing it again task.state = TaskStates.STATE_CLEANING if self._shuttingDown: #let the task know the scheduler is shutting down task.state = TaskStates.STATE_SHUTDOWN log.debug("Cleanup on task %s %s", task.name, task) d = defer.maybeDeferred(task.cleanup) d.addBoth(self._cleanupTaskComplete, task) cleanupWaitList.append(d) return cleanupWaitList def _cleanupTaskComplete(self, result, task): """ Twisted callback to remove a task from the cleanup queue once it has completed its cleanup work. """ log.debug("Scheduler._cleanupTaskComplete: result=%s task.name=%s" % (result, task.name)) self._tasksToCleanup.discard(task) return result def _isTaskCleanable(self, task): """ Determines if a task is able to be cleaned up or not. """ return task.state in (TaskStates.STATE_IDLE, TaskStates.STATE_PAUSED, TaskStates.STATE_COMPLETED)
class Scheduler(object): """ A simple interval-based scheduler that makes use of the Twisted framework's LoopingCall construct. """ zope.interface.implements(IScheduler) CLEANUP_TASKS_INTERVAL = 10 # seconds def __init__(self, callableTaskFactory=CallableTaskFactory()): self._loopingCalls = {} self._tasks = {} self._taskCallback = {} self._taskStats = {} self._callableTaskFactory = callableTaskFactory self._shuttingDown = False # create a cleanup task that will periodically sweep the # cleanup dictionary for tasks that need to be cleaned self._tasksToCleanup = KeyedSet(getConfigId) self._cleanupTask = task.LoopingCall(self._cleanupTasks) self._cleanupTask.start(Scheduler.CLEANUP_TASKS_INTERVAL) self._executor = TwistedExecutor(1) # Ensure that we can cleanly shutdown all of our tasks reactor.addSystemEventTrigger('before', 'shutdown', self.shutdown, 'before') reactor.addSystemEventTrigger('during', 'shutdown', self.shutdown, 'during') reactor.addSystemEventTrigger('after', 'shutdown', self.shutdown, 'after') def __contains__(self, task): """ Returns True if the task has been added to the scheduler. Otherwise False is returned. """ # If task has no 'name' attribute, assume the task name was passed in. name = getattr(task, "name", task) return name in self._tasks def shutdown(self, phase): """ The reactor shutdown has three phases for event types: before - tasks can shut down safely at this time with access to all services (eg EventService, their own services) during - EventService and other services are gone before this starts after - not a lot left -- be careful Tasks that have the attribute 'stopPhase' can set the state for which they should be run, otherwise they will be shut down in the 'before' phase. Tasks that have the attribute 'stopOrder' can set the order in which they are shut down (lowest first). A stopOrder of 0 (zero) is assumed for tasks which do not declare a stopOrder. Returns a list of deferreds to wait on. """ self._shuttingDown = True doomedTasks = [] stopQ = {} log.debug("In shutdown stage %s", phase) for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task stopPhase = getattr(task, 'stopPhase', 'before') if stopPhase in ('before', 'after', 'during') and \ stopPhase != phase: continue stopOrder = getattr(task, 'stopOrder', 0) queue = stopQ.setdefault(stopOrder, []) queue.append((taskName, taskWrapper, task)) for stopOrder in sorted(stopQ): for (taskName, taskWrapper, task) in stopQ[stopOrder]: loopTask = self._loopingCalls[taskName] if loopTask.running: log.debug("Stopping running task %s", taskName) loopTask.stop() log.debug("Removing task %s", taskName) doomedTasks.append(taskName) self.taskRemoved(taskWrapper) for taskName in doomedTasks: self._tasksToCleanup.add(self._tasks[taskName].task) del self._loopingCalls[taskName] del self._tasks[taskName] del self._taskStats[taskName] cleanupList = self._cleanupTasks() return defer.DeferredList(cleanupList) @property def executor(self): return self._executor def _getMaxTasks(self): return self._executor.getMax() def _setMaxTasks(self, max): return self._executor.setMax(max) maxTasks = property(_getMaxTasks, _setMaxTasks) def _ltCallback(self, result, task_name): """last call back in the chain, if it gets called as an errBack the looping will stop - shouldn't be called since CallableTask doesn't return a deferred, here for sanity and debug""" if task_name in self._loopingCalls: loopingCall = self._loopingCalls[task_name] log.debug("call finished %s : %s" % (loopingCall, result)) if isinstance(result, Failure): log.warn("Failure in looping call, will not reschedule %s" % task_name) log.error("%s" % result) def _startTask(self, result, task_name, interval, configId, delayed): """start the task using a callback so that its put at the bottom of the Twisted event queue, to allow other processing to continue and to support a task start-time jitter""" if task_name in self._loopingCalls: loopingCall = self._loopingCalls[task_name] if not loopingCall.running: if self._tasksToCleanup.has_key(configId): delay = random.randint(0, int(interval / 2)) delayed = delayed + delay log.debug( "Waiting for cleanup of %s. Task %s postponing its start %d seconds (%d so far)", configId, task_name, delay, delayed) d = defer.Deferred() d.addCallback(self._startTask, task_name, interval, configId, delayed) reactor.callLater(delay, d.callback, None) else: log.debug( "Task %s starting (waited %d seconds) on %d second intervals", task_name, delayed, interval) d = loopingCall.start(interval) d.addBoth(self._ltCallback, task_name) def addTask(self, newTask, callback=None, now=False): """ Add a new IScheduledTask to the scheduler for execution. @param newTask the new task to schedule @type newTask IScheduledTask @param callback a callback to be notified each time the task completes @type callback a Python callable """ if newTask.name in self._tasks: raise ValueError("Task %s already exists" % newTask.name) log.debug("add task %s, %s using %s second interval", newTask.name, newTask, newTask.interval) callableTask = self._callableTaskFactory.getCallableTask(newTask, self) loopingCall = task.LoopingCall(callableTask) self._loopingCalls[newTask.name] = loopingCall self._tasks[newTask.name] = callableTask self._taskCallback[newTask.name] = callback self.taskAdded(callableTask) startDelay = getattr(newTask, 'startDelay', None) if startDelay is None: startDelay = 0 if now else self._getStartDelay(newTask) d = defer.Deferred() d.addCallback(self._startTask, newTask.name, newTask.interval, newTask.configId, startDelay) reactor.callLater(startDelay, d.callback, None) # just in case someone does not implement scheduled, lets be careful scheduled = getattr(newTask, 'scheduled', lambda x: None) scheduled(self) def _getStartDelay(self, task): """ amount of time to delay the start of a task. Prevents bunching up of task execution when a large amount of tasks are scheduled at the same time. """ #simple delay of random number between 0 and half the task interval delay = random.randint(0, int(task.interval / 2)) return delay def taskAdded(self, taskWrapper): """ Called whenever the scheduler adds a task. """ task = taskWrapper.task # watch the task's attribute changes task.attachAttributeObserver('state', self._taskStateChangeListener) task.attachAttributeObserver('interval', self._taskIntervalChangeListener) # create the statistics data for this task self._taskStats[task.name] = TaskStatistics(task) taskWrapper.taskStats = self._taskStats[task.name] def taskRemoved(self, taskWrapper): """ Called whenever the scheduler removes a task. """ task = taskWrapper.task task.detachAttributeObserver('state', self._taskStateChangeListener) task.detachAttributeObserver('interval', self._taskIntervalChangeListener) def taskPaused(self, taskWrapper): """ Called whenever the scheduler pauses a task. """ pass def taskResumed(self, taskWrapper): """ Called whenever the scheduler resumes a task. """ pass def getTasksForConfig(self, configId): """ Get all tasks associated with the specified identifier. """ tasks = [] for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task if task.configId == configId: tasks.append(task) return tasks def removeTasksForConfig(self, configId): """ Remove all tasks associated with the specified identifier. @paramater configId: the identifier to search for @type configId: string """ doomedTasks = [] # child ids are any task that are children of the current task being # removed childIds = [] for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task if task.configId == configId: subIds = getattr(task, "childIds", None) if subIds: childIds.extend(subIds) log.debug("Stopping task %s, %s", taskName, task) if self._loopingCalls[taskName].running: self._loopingCalls[taskName].stop() doomedTasks.append(taskName) self.taskRemoved(taskWrapper) for taskName in doomedTasks: task = self._tasks[taskName].task self._tasksToCleanup.add(task) del self._loopingCalls[taskName] del self._tasks[taskName] self._displayTaskStatistics(task) del self._taskStats[taskName] # TODO: ponder task statistics and keeping them around? for childId in childIds: self.removeTasksForConfig(childId) def pauseTasksForConfig(self, configId): for (taskName, taskWrapper) in self._tasks.items(): task = taskWrapper.task if task.configId == configId: log.debug("Pausing task %s", taskName) taskWrapper.paused = True self.taskPaused(taskWrapper) def resumeTasksForConfig(self, configId): for (taskName, taskWrapper) in self._tasks.iteritems(): task = taskWrapper.task if task.configId == configId: log.debug("Resuming task %s", taskName) taskWrapper.paused = False self.taskResumed(taskWrapper) def taskDone(self, taskName): if callable(self._taskCallback[taskName]): self._taskCallback[taskName](taskName=taskName) def _taskStateChangeListener(self, observable, attrName, oldValue, newValue): task = observable log.debug("Task %s changing state from %s to %s", task.name, oldValue, newValue) taskStat = self._taskStats[task.name] taskStat.trackStateChange(oldValue, newValue) def _taskIntervalChangeListener(self, observable, attrName, oldValue, newValue): """ Allows tasks to change their collection interval on the fly """ task = observable log.debug("Task %s changing run interval from %s to %s", task.name, oldValue, newValue) #TODO: should this be... # loopingCall = self._loopingCalls[task.name] loopingCall = task._dataService._scheduler._loopingCalls[task.name] loopingCall.interval = newValue @property def missedRuns(self): totalMissedRuns = 0 for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] totalMissedRuns += taskStats.missedRuns return totalMissedRuns @property def failedRuns(self): totalFailedRuns = 0 for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] totalFailedRuns += taskStats.failedRuns return totalFailedRuns def displayStatistics(self, verbose): totalRuns = 0 totalFailedRuns = 0 totalMissedRuns = 0 totalTasks = 0 stateStats = {} for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] totalTasks += 1 totalRuns += taskStats.totalRuns totalFailedRuns += taskStats.failedRuns totalMissedRuns += taskStats.missedRuns for state, stats in taskStats.states.iteritems(): if state in stateStats: totalStateStats = stateStats[state] else: totalStateStats = StateStatistics(state) stateStats[state] = totalStateStats totalStateStats.totalElapsedTime += stats.totalElapsedTime totalStateStats.totalElapsedTimeSquared += stats.totalElapsedTimeSquared totalStateStats.totalCalls += stats.totalCalls totalStateStats.minElapsedTime = min( totalStateStats.minElapsedTime, stats.minElapsedTime) totalStateStats.maxElapsedTime = max( totalStateStats.maxElapsedTime, stats.maxElapsedTime) log.info("Tasks: %d Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d " \ "Queued_Tasks: %d Running_Tasks: %d ", totalTasks, totalRuns, totalFailedRuns, totalMissedRuns, self.executor.queued, self.executor.running) if verbose: buffer = "Task States Summary:\n" buffer = self._displayStateStatistics(buffer, stateStats) buffer += "\nTasks:\n" for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] buffer += "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \ % (task.name, task.state, taskStats.totalRuns, taskStats.failedRuns, taskStats.missedRuns) buffer += "\nDetailed Task States:\n" for taskWrapper in self._tasks.itervalues(): task = taskWrapper.task taskStats = self._taskStats[task.name] if not taskStats.states: # Hasn't run yet continue buffer = self._displayStateStatistics(buffer, taskStats.states, "%s " % task.name) if hasattr(task, 'displayStatistics'): buffer += task.displayStatistics() buffer += "\n" log.info("Detailed Scheduler Statistics:\n%s", buffer) # TODO: the above logic doesn't print out any stats for the 'current' # state... i.e. enter the PAUSED state and wait there an hour, and # you'll see no data # TODO: should we reset the statistics here, or do it on a periodic # interval, like once an hour or day or something? def _displayTaskStatistics(self, task): verbose = log.isEnabledFor(logging.DEBUG) if not verbose: return taskStats = self._taskStats[task.name] buffer = "%s Current State: %s Successful_Runs: %d Failed_Runs: %d Missed_Runs: %d\n" \ % (task.name, task.state, taskStats.totalRuns, taskStats.failedRuns, taskStats.missedRuns) buffer += "\nDetailed Task States:\n" if taskStats.states: # Hasn't run yet buffer = self._displayStateStatistics(buffer, taskStats.states, "%s " % task.name) if hasattr(task, 'displayStatistics'): buffer += task.displayStatistics() buffer += "\n" log.info("Detailed Task Statistics:\n%s", buffer) def _displayStateStatistics(self, buffer, stateStats, prefix=''): for state, stats in stateStats.iteritems(): buffer += "%sState: %s Total: %d Total Elapsed: %.4f Min: %.4f Max: %.4f Mean: %.4f StdDev: %.4f\n" \ % (prefix, state, stats.totalCalls, stats.totalElapsedTime, stats.minElapsedTime, stats.maxElapsedTime, stats.mean, stats.stddev) return buffer def _cleanupTasks(self): """ Periodically cleanup tasks that have been queued up for cleaning. """ # Build a list of the tasks that need to be cleaned up so that there # is no issue with concurrent modifications to the _tasksToCleanup # dictionary when tasks are quickly cleaned up. if self._tasksToCleanup: log.debug("tasks to clean %s", self._tasksToCleanup) todoList = [ task for task in self._tasksToCleanup if self._isTaskCleanable(task) ] cleanupWaitList = [] for task in todoList: # changing the state of the task will keep the next cleanup run # from processing it again task.state = TaskStates.STATE_CLEANING if self._shuttingDown: #let the task know the scheduler is shutting down task.state = TaskStates.STATE_SHUTDOWN log.debug("Cleanup on task %s %s", task.name, task) d = defer.maybeDeferred(task.cleanup) d.addBoth(self._cleanupTaskComplete, task) cleanupWaitList.append(d) return cleanupWaitList def _cleanupTaskComplete(self, result, task): """ Twisted callback to remove a task from the cleanup queue once it has completed its cleanup work. """ log.debug("Scheduler._cleanupTaskComplete: result=%s task.name=%s" % (result, task.name)) self._tasksToCleanup.discard(task) return result def _isTaskCleanable(self, task): """ Determines if a task is able to be cleaned up or not. """ return task.state in (TaskStates.STATE_IDLE, TaskStates.STATE_PAUSED, TaskStates.STATE_COMPLETED)
class SshPerformanceCollectionTask(BaseTask): """ A task that performs periodic performance collection for devices providing data via SSH connections. """ zope.interface.implements(IScheduledTask) STATE_CONNECTING = 'CONNECTING' STATE_FETCH_DATA = 'FETCH_DATA' STATE_PARSE_DATA = 'PARSING_DATA' STATE_STORE_PERF = 'STORE_PERF_DATA' def __init__(self, taskName, configId, scheduleIntervalSeconds, taskConfig): """ @param taskName: the unique identifier for this task @type taskName: string @param configId: configuration to watch @type configId: string @param scheduleIntervalSeconds: the interval at which this task will be collected @type scheduleIntervalSeconds: int @param taskConfig: the configuration for this task """ super(SshPerformanceCollectionTask, self).__init__( taskName, configId, scheduleIntervalSeconds, taskConfig ) # Needed for interface self.name = taskName self.configId = configId self.state = TaskStates.STATE_IDLE self.interval = scheduleIntervalSeconds # The taskConfig corresponds to a DeviceProxy self._device = taskConfig self._devId = self._device.id self._manageIp = self._device.manageIp self._dataService = zope.component.queryUtility(IDataService) self._eventService = zope.component.queryUtility(IEventService) self._preferences = zope.component.queryUtility(ICollectorPreferences, COLLECTOR_NAME) self._lastErrorMsg = '' self._maxbackoffseconds = self._preferences.options.maxbackoffminutes * 60 self._concurrentSessions = taskConfig.zSshConcurrentSessions self._executor = TwistedExecutor(self._concurrentSessions) self._useSsh = taskConfig.datasources[0].useSsh self._connection = None self._datasources = taskConfig.datasources self.pool = getPool('SSH Connections') self.executed = 0 self._task_defer = None def __str__(self): return "COMMAND schedule Name: %s configId: %s Datasources: %d" % ( self.name, self.configId, len(self._datasources)) def cleanup(self): self._cleanUpPool() self._close() def _getPoolKey(self): """ Get the key under which the client should be stored in the pool. """ username = self._device.zCommandUsername password = self._device.zCommandPassword ip = self._manageIp port = self._device.zCommandPort return hash((username, password, ip, port)) def _cleanUpPool(self): """ Close the connection currently associated with this task. """ poolkey = self._getPoolKey() if poolkey in self.pool: client = self.pool[poolkey] if not client._taskList: # No other tasks, so safe to clean up transport = client.transport if transport: transport.loseConnection() del self.pool[poolkey] def doTask(self): """ Contact to one device and return a deferred which gathers data from the device. @return: Deferred actions to run against a device configuration @rtype: Twisted deferred object """ # Create a deferred wrapper for the scheduler since the connection # deferred is shared among many tasks. We have to initialize this # first to satisfy race condition with _connectCallback d = self._task_defer = defer.Deferred() d.addCallback(self._fetchPerf) # Call _finished for both success and error scenarios d.addBoth(self._finished) #---------------------------------------------------------------------- # See if we need to connect first before doing any collection d = defer.maybeDeferred(self._connect) d.addCallbacks(self._connectCallback, self.connectionFailed) # Wait until the Deferred actually completes return self._task_defer def _connect(self): """ If a local datasource executor, do nothing. If an SSH datasource executor, create a connection to object the remote device. Make a new SSH connection object if there isn't one available. This doesn't actually connect to the device. """ if not self._useSsh: return defer.succeed(None) connection = self.pool.get(self._getPoolKey(), None) if connection is None: self.state = SshPerformanceCollectionTask.STATE_CONNECTING log.debug("Creating connection object to %s", self._devId) username = self._device.zCommandUsername password = self._device.zCommandPassword loginTimeout = self._device.zCommandLoginTimeout commandTimeout = self._device.zCommandCommandTimeout keypath = self._device.zKeyPath options = SshOptions(username, password, loginTimeout, commandTimeout, keypath, self._concurrentSessions) connection = MySshClient(self._devId, self._manageIp, self._device.zCommandPort, options=options) connection.sendEvent = self._eventService.sendEvent # Opens SSH connection to device d = connection.run() self.pool[self._getPoolKey()] = d return d return connection def _close(self): """ If a local datasource executor, do nothing. If an SSH datasource executor, relinquish a connection to the remote device. """ if self._connection: self._connection._taskList.discard(self) if not self._connection._taskList: if self._getPoolKey() in self.pool: client = self.pool[self._getPoolKey()] client.clientFinished() self._connection = None def connectionFailed(self, msg): """ This method is called by the SSH client when the connection fails. @parameter msg: message indicating the cause of the problem @type msg: string """ # Note: Raising an exception and then catching it doesn't work # as it appears that the exception is discarded in PBDaemon.py self.state = TaskStates.STATE_PAUSED log.error("Pausing task %s as %s [%s] connection failure: %s", self.name, self._devId, self._manageIp, msg.getErrorMessage()) self._eventService.sendEvent(STATUS_EVENT, device=self._devId, summary=msg.getErrorMessage(), component=COLLECTOR_NAME, severity=Event.Error) self._task_defer.errback(msg) return msg def _connectCallback(self, connection): """ Callback called after a successful connect to the remote device. """ if self._useSsh: self._connection = connection self._connection._taskList.add(self) msg = "Connected to %s [%s]" % (self._devId, self._manageIp) self._connection.sendEvent(STATUS_EVENT, device=self._devId, summary=msg, component=COLLECTOR_NAME, severity=Clear) else: msg = "Running command(s) locally" log.debug(msg) self._task_defer.callback(connection) return connection def _addDatasource(self, datasource): """ Add a new instantiation of ProcessRunner or SshRunner for every datasource. """ if self._preferences.options.showfullcommand: log.info("Datasource %s command: %s", datasource.name, datasource.command) if self._useSsh: runner = SshRunner(self._connection) else: runner = ProcessRunner() d = runner.start(datasource) datasource.lastStart = time.time() d.addBoth(datasource.processCompleted) return d def _fetchPerf(self, ignored): """ Get performance data for all the monitored components on a device @parameter ignored: required to keep Twisted's callback chain happy @type ignored: result of previous callback """ self.state = SshPerformanceCollectionTask.STATE_FETCH_DATA # The keys are the datasource commands, which are by definition unique # to the command run. cacheableDS = {} # Bundle up the list of tasks deferredCmds = [] for datasource in self._datasources: datasource.deviceConfig = self._device if datasource.command in cacheableDS: cacheableDS[datasource.command].append(datasource) continue cacheableDS[datasource.command] = [] task = self._executor.submit(self._addDatasource, datasource) deferredCmds.append(task) # Run the tasks dl = defer.DeferredList(deferredCmds, consumeErrors=True) dl.addCallback(self._parseResults, cacheableDS) dl.addCallback(self._storeResults) dl.addCallback(self._updateStatus) # Save the list in case we need to cancel the commands self._commandsToExecute = dl return dl def _parseResults(self, resultList, cacheableDS): """ Interpret the results retrieved from the commands and pass on the datapoint values and events. @parameter resultList: results of running the commands in a DeferredList @type resultList: array of (boolean, datasource) @parameter cacheableDS: other datasources that can use the same results @type cacheableDS: dictionary of arrays of datasources """ self.state = SshPerformanceCollectionTask.STATE_PARSE_DATA parseableResults = [] for success, datasource in resultList: results = ParsedResults() if not success: # In this case, our datasource is actually a defer.Failure reason = datasource datasource, = reason.value.args msg = "Datasource %s command timed out" % ( datasource.name) ev = self._makeCmdEvent(datasource, msg, event_key='Timeout') results.events.append(ev) else: # clear our timeout event msg = "Datasource %s command timed out" % datasource.name ev = self._makeCmdEvent(datasource, msg, severity=Clear, event_key='Timeout') # Re-use our results for any similar datasources cachedDsList = cacheableDS.get(datasource.command) if cachedDsList: for ds in cachedDsList: ds.result = copy(datasource.result) results = ParsedResults() self._processDatasourceResults(ds, results) parseableResults.append( (ds, results) ) results = ParsedResults() results.events.append(ev) self._processDatasourceResults(datasource, results) parseableResults.append( (datasource, results) ) return parseableResults def _makeParser(self, datasource, eventList): """ Create a parser object to process data @parameter datasource: datasource containg information @type datasource: Cmd object @parameter eventList: list of events @type eventList: list of dictionaries """ parser = None try: parser = datasource.parser.create() except Exception: msg = "Error loading parser %s" % datasource.parser log.exception("%s %s %s", self.name, datasource.name, msg) ev = self._makeCmdEvent(datasource, msg) ev['message'] = traceback.format_exc() eventList.append(ev) return parser def _processDatasourceResults(self, datasource, results): """ Process a single datasource's results @parameter datasource: datasource containg information @type datasource: Cmd object @parameter results: empty results object @type results: ParsedResults object """ showcommand = self._preferences.options.showfullcommand result = datasource.result if result.exitCode == 0 and not result.output.strip(): msg = "No data returned for command" if showcommand: msg += ": %s" % datasource.command log.warn("%s %s %s", self.name, datasource.name, msg) ev = self._makeCmdEvent(datasource, msg) if showcommand: ev['command'] = datasource.command results.events.append(ev) return parser = self._makeParser(datasource, results.events) if not parser: return try: parser.preprocessResults(datasource, log) parser.processResults(datasource, results) if not results.events and parser.createDefaultEventUsingExitCode: # Add a failsafe event guessing at the error codes self._addDefaultEvent(datasource, results) if datasource.result.stderr: self._addStderrMsg(datasource.result.stderr, results.events) except Exception: msg = "Error running parser %s" % datasource.parser log.exception("%s %s %s", self.name, datasource.name, msg) ev = self._makeCmdEvent(datasource, msg) ev['message'] = traceback.format_exc() ev['output'] = datasource.result.output results.events.append(ev) def _addDefaultEvent(self, datasource, results): """ If there is no event, send one based on the exit code. """ exitCode = datasource.result.exitCode if exitCode == 0: msg = '' severity = 0 else: msg = 'Datasource: %s - Code: %s - Msg: %s' % ( datasource.name, exitCode, getExitMessage(exitCode)) severity = datasource.severity ev = self._makeCmdEvent(datasource, msg, severity) results.events.append(ev) def _addStderrMsg(self, stderrMsg, eventList): """ Add the stderr output to error events. @parameter stderrMsg: stderr output from the command @type stderrMsg: string @parameter eventList: list of events @type eventList: list of dictionaries """ for event in eventList: if event['severity'] not in ('Clear', 'Info', 'Debug'): event['stderr'] = stderrMsg def _storeResults(self, resultList): """ Store the values in RRD files @parameter resultList: results of running the commands @type resultList: array of (datasource, dictionary) """ self.state = SshPerformanceCollectionTask.STATE_STORE_PERF for datasource, results in resultList: for dp, value in results.values: threshData = { 'eventKey': datasource.getEventKey(dp), 'component': dp.component, } self._dataService.writeRRD( dp.rrdPath, value, dp.rrdType, dp.rrdCreateCommand, datasource.cycleTime, dp.rrdMin, dp.rrdMax, threshData) return resultList def _updateStatus(self, resultList): """ Send any accumulated events @parameter resultList: results of running the commands @type resultList: array of (datasource, dictionary) """ for datasource, results in resultList: self._clearEvent(datasource, results.events) for ev in results.events: self._eventService.sendEvent(ev, device=self._devId) return resultList def _clearEvent(self, datasource, eventList): """ Ensure that a CLEAR event is sent for any command that successfully completes. """ # If the result is a Failure, no exitCode exists exitCode = getattr(datasource.result, 'exitCode', -1) # Don't send if non-zero exit code or no output returned if exitCode is None or exitCode != 0 or not datasource.result.output.strip(): return clearEvents = [ev for ev in eventList if ev['severity'] == Clear] if not clearEvents: msg = 'Datasource %s command completed successfully' % ( datasource.name) ev = self._makeCmdEvent(datasource, msg, severity=Clear) eventList.append(ev) def _makeCmdEvent(self, datasource, msg, severity=None, event_key=None): """ Create an event using the info in the Cmd object. """ severity = datasource.severity if severity is None else severity event_key = datasource.eventKey if event_key is None else event_key ev = dict( device=self._devId, component=datasource.component, eventClass=datasource.eventClass, eventKey=event_key, severity=severity, summary=msg ) return ev def _finished(self, result): """ Callback activated when the task is complete @parameter result: results of the task @type result: deferred object """ if not isinstance(result, Failure): self._returnToNormalSchedule() try: self._close() except Exception, ex: log.warn("Failed to close device %s: error %s" % (self._devId, str(ex))) # Return the result so the framework can track success/failure return result
class SshPerformanceCollectionTask(BaseTask): """ A task that performs periodic performance collection for devices providing data via SSH connections. """ zope.interface.implements(IScheduledTask) STATE_CONNECTING = 'CONNECTING' STATE_FETCH_DATA = 'FETCH_DATA' STATE_PARSE_DATA = 'PARSING_DATA' STATE_STORE_PERF = 'STORE_PERF_DATA' def __init__(self, taskName, configId, scheduleIntervalSeconds, taskConfig): """ @param taskName: the unique identifier for this task @type taskName: string @param configId: configuration to watch @type configId: string @param scheduleIntervalSeconds: the interval at which this task will be collected @type scheduleIntervalSeconds: int @param taskConfig: the configuration for this task """ super(SshPerformanceCollectionTask, self).__init__(taskName, configId, scheduleIntervalSeconds, taskConfig) # Needed for interface self.name = taskName self.configId = configId self.state = TaskStates.STATE_IDLE self.interval = scheduleIntervalSeconds # The taskConfig corresponds to a DeviceProxy self._device = taskConfig self._devId = self._device.id self._manageIp = self._device.manageIp self._dataService = zope.component.queryUtility(IDataService) self._eventService = zope.component.queryUtility(IEventService) self._preferences = zope.component.queryUtility( ICollectorPreferences, COLLECTOR_NAME) self._lastErrorMsg = '' self._maxbackoffseconds = self._preferences.options.maxbackoffminutes * 60 self._concurrentSessions = taskConfig.zSshConcurrentSessions self._executor = TwistedExecutor(self._concurrentSessions) self._useSsh = taskConfig.datasources[0].useSsh self._connection = None self._datasources = taskConfig.datasources self.pool = getPool('SSH Connections') self.executed = 0 self._task_defer = None def __str__(self): return "COMMAND schedule Name: %s configId: %s Datasources: %d" % ( self.name, self.configId, len(self._datasources)) def cleanup(self): self._cleanUpPool() self._close() def _getPoolKey(self): """ Get the key under which the client should be stored in the pool. """ username = self._device.zCommandUsername password = self._device.zCommandPassword ip = self._manageIp port = self._device.zCommandPort return hash((username, password, ip, port)) def _cleanUpPool(self): """ Close the connection currently associated with this task. """ poolkey = self._getPoolKey() if poolkey in self.pool: client = self.pool[poolkey] if not client._taskList: # No other tasks, so safe to clean up transport = client.transport if transport: transport.loseConnection() del self.pool[poolkey] def doTask(self): """ Contact to one device and return a deferred which gathers data from the device. @return: Deferred actions to run against a device configuration @rtype: Twisted deferred object """ # Create a deferred wrapper for the scheduler since the connection # deferred is shared among many tasks. We have to initialize this # first to satisfy race condition with _connectCallback d = self._task_defer = defer.Deferred() d.addCallback(self._fetchPerf) # Call _finished for both success and error scenarios d.addBoth(self._finished) #---------------------------------------------------------------------- # See if we need to connect first before doing any collection d = defer.maybeDeferred(self._connect) d.addCallbacks(self._connectCallback, self.connectionFailed) # Wait until the Deferred actually completes return self._task_defer def _connect(self): """ If a local datasource executor, do nothing. If an SSH datasource executor, create a connection to object the remote device. Make a new SSH connection object if there isn't one available. This doesn't actually connect to the device. """ if not self._useSsh: return defer.succeed(None) connection = self.pool.get(self._getPoolKey(), None) if connection is None: self.state = SshPerformanceCollectionTask.STATE_CONNECTING log.debug("Creating connection object to %s", self._devId) username = self._device.zCommandUsername password = self._device.zCommandPassword loginTimeout = self._device.zCommandLoginTimeout commandTimeout = self._device.zCommandCommandTimeout keypath = self._device.zKeyPath options = SshOptions(username, password, loginTimeout, commandTimeout, keypath, self._concurrentSessions) connection = MySshClient(self._devId, self._manageIp, self._device.zCommandPort, options=options) connection.sendEvent = self._eventService.sendEvent # Opens SSH connection to device d = connection.run() self.pool[self._getPoolKey()] = d return d return connection def _close(self): """ If a local datasource executor, do nothing. If an SSH datasource executor, relinquish a connection to the remote device. """ if self._connection: self._connection._taskList.discard(self) if not self._connection._taskList: if self._getPoolKey() in self.pool: client = self.pool[self._getPoolKey()] client.clientFinished() self._connection = None def connectionFailed(self, msg): """ This method is called by the SSH client when the connection fails. @parameter msg: message indicating the cause of the problem @type msg: string """ # Note: Raising an exception and then catching it doesn't work # as it appears that the exception is discarded in PBDaemon.py self.state = TaskStates.STATE_PAUSED log.error("Pausing task %s as %s [%s] connection failure: %s", self.name, self._devId, self._manageIp, msg.getErrorMessage()) self._eventService.sendEvent(STATUS_EVENT, device=self._devId, summary=msg.getErrorMessage(), component=COLLECTOR_NAME, severity=Event.Error) self._task_defer.errback(msg) return msg def _connectCallback(self, connection): """ Callback called after a successful connect to the remote device. """ if self._useSsh: self._connection = connection self._connection._taskList.add(self) msg = "Connected to %s [%s]" % (self._devId, self._manageIp) self._connection.sendEvent(STATUS_EVENT, device=self._devId, summary=msg, component=COLLECTOR_NAME, severity=Clear) else: msg = "Running command(s) locally" log.debug(msg) self._task_defer.callback(connection) return connection def _addDatasource(self, datasource): """ Add a new instantiation of ProcessRunner or SshRunner for every datasource. """ if self._preferences.options.showfullcommand: log.info("Datasource %s command: %s", datasource.name, datasource.command) if self._useSsh: runner = SshRunner(self._connection) else: runner = ProcessRunner() d = runner.start(datasource) datasource.lastStart = time.time() d.addBoth(datasource.processCompleted) return d def _fetchPerf(self, ignored): """ Get performance data for all the monitored components on a device @parameter ignored: required to keep Twisted's callback chain happy @type ignored: result of previous callback """ self.state = SshPerformanceCollectionTask.STATE_FETCH_DATA # The keys are the datasource commands, which are by definition unique # to the command run. cacheableDS = {} # Bundle up the list of tasks deferredCmds = [] for datasource in self._datasources: datasource.deviceConfig = self._device if datasource.command in cacheableDS: cacheableDS[datasource.command].append(datasource) continue cacheableDS[datasource.command] = [] task = self._executor.submit(self._addDatasource, datasource) deferredCmds.append(task) # Run the tasks dl = defer.DeferredList(deferredCmds, consumeErrors=True) dl.addCallback(self._parseResults, cacheableDS) dl.addCallback(self._storeResults) dl.addCallback(self._updateStatus) # Save the list in case we need to cancel the commands self._commandsToExecute = dl return dl def _parseResults(self, resultList, cacheableDS): """ Interpret the results retrieved from the commands and pass on the datapoint values and events. @parameter resultList: results of running the commands in a DeferredList @type resultList: array of (boolean, datasource) @parameter cacheableDS: other datasources that can use the same results @type cacheableDS: dictionary of arrays of datasources """ self.state = SshPerformanceCollectionTask.STATE_PARSE_DATA parseableResults = [] for success, datasource in resultList: results = ParsedResults() if not success: # In this case, our datasource is actually a defer.Failure reason = datasource datasource, = reason.value.args msg = "Datasource %s command timed out" % (datasource.name) ev = self._makeCmdEvent(datasource, msg, event_key='Timeout') results.events.append(ev) else: # clear our timeout event msg = "Datasource %s command timed out" % datasource.name ev = self._makeCmdEvent(datasource, msg, severity=Clear, event_key='Timeout') # Re-use our results for any similar datasources cachedDsList = cacheableDS.get(datasource.command) if cachedDsList: for ds in cachedDsList: ds.result = copy(datasource.result) results = ParsedResults() self._processDatasourceResults(ds, results) parseableResults.append((ds, results)) results = ParsedResults() results.events.append(ev) self._processDatasourceResults(datasource, results) parseableResults.append((datasource, results)) return parseableResults def _makeParser(self, datasource, eventList): """ Create a parser object to process data @parameter datasource: datasource containg information @type datasource: Cmd object @parameter eventList: list of events @type eventList: list of dictionaries """ parser = None try: parser = datasource.parser.create() except Exception: msg = "Error loading parser %s" % datasource.parser log.exception("%s %s %s", self.name, datasource.name, msg) ev = self._makeCmdEvent(datasource, msg) ev['message'] = traceback.format_exc() eventList.append(ev) return parser def _processDatasourceResults(self, datasource, results): """ Process a single datasource's results @parameter datasource: datasource containg information @type datasource: Cmd object @parameter results: empty results object @type results: ParsedResults object """ showcommand = self._preferences.options.showfullcommand result = datasource.result if result.exitCode == 0 and not result.output.strip(): msg = "No data returned for command" if showcommand: msg += ": %s" % datasource.command log.warn("%s %s %s", self.name, datasource.name, msg) ev = self._makeCmdEvent(datasource, msg) if showcommand: ev['command'] = datasource.command results.events.append(ev) return parser = self._makeParser(datasource, results.events) if not parser: return try: parser.preprocessResults(datasource, log) parser.processResults(datasource, results) if not results.events and parser.createDefaultEventUsingExitCode: # Add a failsafe event guessing at the error codes self._addDefaultEvent(datasource, results) if datasource.result.stderr: self._addStderrMsg(datasource.result.stderr, results.events) except Exception: msg = "Error running parser %s" % datasource.parser log.exception("%s %s %s", self.name, datasource.name, msg) ev = self._makeCmdEvent(datasource, msg) ev['message'] = traceback.format_exc() ev['output'] = datasource.result.output results.events.append(ev) def _addDefaultEvent(self, datasource, results): """ If there is no event, send one based on the exit code. """ exitCode = datasource.result.exitCode if exitCode == 0: msg = '' severity = 0 else: msg = 'Datasource: %s - Code: %s - Msg: %s' % ( datasource.name, exitCode, getExitMessage(exitCode)) severity = datasource.severity ev = self._makeCmdEvent(datasource, msg, severity) results.events.append(ev) def _addStderrMsg(self, stderrMsg, eventList): """ Add the stderr output to error events. @parameter stderrMsg: stderr output from the command @type stderrMsg: string @parameter eventList: list of events @type eventList: list of dictionaries """ for event in eventList: if event['severity'] not in ('Clear', 'Info', 'Debug'): event['stderr'] = stderrMsg def _storeResults(self, resultList): """ Store the values in RRD files @parameter resultList: results of running the commands @type resultList: array of (datasource, dictionary) """ self.state = SshPerformanceCollectionTask.STATE_STORE_PERF for datasource, results in resultList: for dp, value in results.values: threshData = { 'eventKey': datasource.getEventKey(dp), 'component': dp.component, } self._dataService.writeRRD(dp.rrdPath, value, dp.rrdType, dp.rrdCreateCommand, datasource.cycleTime, dp.rrdMin, dp.rrdMax, threshData) return resultList def _updateStatus(self, resultList): """ Send any accumulated events @parameter resultList: results of running the commands @type resultList: array of (datasource, dictionary) """ for datasource, results in resultList: self._clearEvent(datasource, results.events) for ev in results.events: self._eventService.sendEvent(ev, device=self._devId) return resultList def _clearEvent(self, datasource, eventList): """ Ensure that a CLEAR event is sent for any command that successfully completes. """ # If the result is a Failure, no exitCode exists exitCode = getattr(datasource.result, 'exitCode', -1) # Don't send if non-zero exit code or no output returned if exitCode is None or exitCode != 0 or not datasource.result.output.strip( ): return clearEvents = [ev for ev in eventList if ev['severity'] == Clear] if not clearEvents: msg = 'Datasource %s command completed successfully' % ( datasource.name) ev = self._makeCmdEvent(datasource, msg, severity=Clear) eventList.append(ev) def _makeCmdEvent(self, datasource, msg, severity=None, event_key=None): """ Create an event using the info in the Cmd object. """ severity = datasource.severity if severity is None else severity event_key = datasource.eventKey if event_key is None else event_key ev = dict(device=self._devId, component=datasource.component, eventClass=datasource.eventClass, eventKey=event_key, severity=severity, summary=msg) return ev def _finished(self, result): """ Callback activated when the task is complete @parameter result: results of the task @type result: deferred object """ if not isinstance(result, Failure): self._returnToNormalSchedule() try: self._close() except Exception, ex: log.warn("Failed to close device %s: error %s" % (self._devId, str(ex))) # Return the result so the framework can track success/failure return result