class DefaultManager(object): """The default storage manager used by all shadow classes. Mostly uses helper methods in shadow classes to perform its work. """ _logger = ipdevpoll.ContextLogger() def __init__(self, cls, containers): """Creates a storage manager. :param cls: The Shadow subclass this instance will work with. :param containers: A ContainerRepository instance. """ self.cls = cls self.containers = containers def prepare(self): """Prepares managed shadows in containers""" self.cls.prepare_for_save(self.containers) def save(self): """Saves managed shadows in containers""" for obj in self.get_managed(): obj.save(self.containers) def cleanup(self): """Runs any necessary cleanup hooks after save is done""" self.cls.cleanup_after_save(self.containers) def get_managed(self): """Returns the list of container objects managed by this instance""" if self.cls in self.containers: return self.containers[self.cls].values() else: return [] def __repr__(self): return "%s(%r, %r(...))" % ( self.__class__.__name__, self.cls, self.containers.__class__.__name__, )
def __init__(mcs, name, bases, dct): try: shadowclass = dct['__shadowclass__'] except KeyError: shadowclass = dct['__shadowclass__'] = None super(MetaShadow, mcs).__init__(name, bases, dct) if shadowclass is None: return _meta = getattr(shadowclass, '_meta') setattr(mcs, '_meta', _meta) field_names = [f.name for f in _meta.fields] setattr(mcs, '_fields', field_names) for fname in field_names: setattr(mcs, fname, None) setattr(mcs, '_logger', ipdevpoll.ContextLogger()) MetaShadow.shadowed_classes[shadowclass] = mcs
class NetboxJobScheduler(object): """Netbox job schedule handler. An instance of this class takes care of scheduling, running and rescheduling of a single JobHandler for a single netbox. """ job_counters = {} job_queues = {} global_job_queue = [] global_intensity = config.ipdevpoll_conf.getint('ipdevpoll', 'max_concurrent_jobs') _logger = ipdevpoll.ContextLogger() def __init__(self, job, netbox, pool): self.job = job self.netbox = netbox self.pool = pool self._log_context = dict(job=job.name, sysname=netbox.sysname) self._logger.debug("initializing %r job scheduling for %s", job.name, netbox.sysname) self.cancelled = False self._deferred = Deferred() self._next_call = None self._last_job_started_at = 0 self.running = False self._start_time = None self._current_job = None self.callLater = reactor.callLater def get_current_runtime(self): """Returns time elapsed since the start of the job as a timedelta.""" return datetime.datetime.now() - self._start_time def start(self): """Start polling schedule.""" self._next_call = self.callLater(0, self.run_job) return self._deferred def cancel(self): """Cancel scheduling of this job for this box. Future runs will not be scheduled after this. """ if self.cancelled: self._logger.debug("cancel: Job %r already cancelled for %s", self.job.name, self.netbox.sysname) return if self._next_call.active(): self._next_call.cancel() self._logger.debug("cancel: Job %r cancelled for %s", self.job.name, self.netbox.sysname) else: self._logger.debug( "cancel: Job %r cancelled for %s, " "though no next run was scheduled", self.job.name, self.netbox.sysname) self.cancelled = True self.cancel_running_job() self._deferred.callback(self) def cancel_running_job(self): if self._current_job: self._logger.debug('Cancelling running job') self.pool.cancel(self._current_job) def run_job(self, dummy=None): if self.is_running(): self._logger.info( "Previous %r job is still running for %s, " "not running again now.", self.job.name, self.netbox.sysname) return if self.is_job_limit_reached(): self._logger.debug( "intensity limit reached for %s - waiting to " "run for %s", self.job.name, self.netbox.sysname) self.queue_myself(self.get_job_queue()) return if self.is_global_limit_reached(): self._logger.debug( "global intensity limit reached - waiting to " "run for %s", self.netbox.sysname) self.queue_myself(self.global_job_queue) return # We're ok to start a polling run. try: self._start_time = datetime.datetime.now() deferred = self.pool.execute_job(self.job.name, self.netbox.id, plugins=self.job.plugins, interval=self.job.interval) self._current_job = deferred except Exception: self._log_unhandled_error(Failure()) self.reschedule(60) return self.count_job() self._last_job_started_at = time.time() deferred.addErrback(self._adjust_intensity_on_snmperror) deferred.addCallbacks(self._reschedule_on_success, self._reschedule_on_failure) deferred.addErrback(self._log_unhandled_error) deferred.addCallback(self._unregister_handler) def is_running(self): return self.running @classmethod def _adjust_intensity_on_snmperror(cls, failure): if (failure.check(AbortedJobError) and isinstance(failure.value.cause, SnmpError)): open_sessions = AgentProxy.count_open_sessions() new_limit = int(ceil(open_sessions * 0.90)) if new_limit < cls.global_intensity: cls._logger.warning("Setting global intensity limit to %d", new_limit) cls.global_intensity = new_limit return failure def _update_counters(self, success): prefix = metric_prefix_for_ipdevpoll_job(self.netbox.sysname, self.job.name) counter_path = (prefix + (".success-count" if success else ".failure-count")) _COUNTERS.increment(counter_path) _COUNTERS.start() def _reschedule_on_success(self, result): """Reschedules the next normal run of this job.""" delay = max(0, self.job.interval - self.get_runtime()) self.reschedule(delay) if result: self._log_finished_job(True) else: self._logger.debug("job did nothing") self._update_counters(True if result else None) return result def _reschedule_on_failure(self, failure): """Examines the job failure and reschedules the job if needed.""" if failure.check(SuggestedReschedule): delay = int(failure.value.delay) else: # within 5-10 minutes, but no longer than set interval delay = min(self.job.interval, randint(5 * 60, 10 * 60)) self.reschedule(delay) self._log_finished_job(False) self._update_counters(False) failure.trap(AbortedJobError) def _log_finished_job(self, success=True): status = "completed" if success else "failed" runtime = datetime.timedelta(seconds=self.get_runtime()) next_time = self.get_time_to_next_run() if next_time is not None: if next_time <= 0: delta = "right now" else: delta = "in %s" % datetime.timedelta(seconds=next_time) self._logger.info("%s for %s %s in %s. next run %s.", self.job.name, self.netbox.sysname, status, runtime, delta) else: self._logger.info("%s in %s. no next run scheduled", status, runtime) def get_runtime(self): """Returns the number of seconds passed since the start of last job""" return time.time() - self._last_job_started_at def get_time_to_next_run(self): """Returns the number of seconds until the next job starts""" if self._next_call.active(): return self._next_call.getTime() - time.time() def reschedule(self, delay): """Reschedules the next run of of this job""" if self.cancelled: self._logger.debug("ignoring request to reschedule cancelled job") return next_time = datetime.datetime.now() + datetime.timedelta(seconds=delay) self._logger.debug("Next %r job for %s will be in %d seconds (%s)", self.job.name, self.netbox.sysname, delay, next_time) if self._next_call.active(): self._next_call.reset(delay) else: self._next_call = self.callLater(delay, self.run_job) def _log_unhandled_error(self, failure): if not failure.check(db.ResetDBConnectionError): log_unhandled_failure(self._logger, failure, "Unhandled exception raised by JobHandler") def _unregister_handler(self, result): """Remove a JobHandler from internal data structures.""" if self.running: self.uncount_job() self.unqueue_next_job() self.unqueue_next_global_job() return result def count_job(self): current_count = self.__class__.job_counters.get(self.job.name, 0) current_count += 1 self.__class__.job_counters[self.job.name] = current_count self.running = True def uncount_job(self): current_count = self.__class__.job_counters.get(self.job.name, 0) current_count -= 1 self.__class__.job_counters[self.job.name] = max(current_count, 0) self.running = False self._current_job = None def get_job_count(self): return self.__class__.job_counters.get(self.job.name, 0) def is_job_limit_reached(self): "Returns True if the number of jobs >= the job intensity limit" return (self.job.intensity > 0 and self.get_job_count() >= self.job.intensity) @classmethod def is_global_limit_reached(cls): "Returns True if the global number of jobs >= global intensity limit" return cls.get_global_job_count() >= cls.global_intensity @classmethod def get_global_job_count(cls): if cls.job_counters: return sum(cls.job_counters.values()) else: return 0 def queue_myself(self, queue): queue.append(self) def unqueue_next_job(self): "Unqueues the next waiting job" queue = self.get_job_queue() if queue and not self.is_job_limit_reached(): handler = queue.pop(0) return handler.start() @classmethod def unqueue_next_global_job(cls): "Unqueues the next job waiting because of the global intensity setting" if not cls.is_global_limit_reached(): for index, handler in enumerate(cls.global_job_queue): if not handler.is_job_limit_reached(): del cls.global_job_queue[index] return handler.start() def get_job_queue(self): if self.job.name not in self.job_queues: self.job_queues[self.job.name] = [] return self.job_queues[self.job.name]
class JobScheduler(object): active_schedulers = set() job_logging_loop = None netbox_reload_interval = 2 * 60.0 # seconds netbox_reload_loop = None _logger = ipdevpoll.ContextLogger() def __init__(self, job, pool): """Initializes a job schedule from the job descriptor.""" self._log_context = dict(job=job.name) self.job = job self.pool = pool self.netboxes = NetboxLoader() self.active_netboxes = {} self.active_schedulers.add(self) @classmethod def initialize_from_config_and_run(cls, pool, onlyjob=None): descriptors = config.get_jobs() schedulers = [ JobScheduler(d, pool) for d in descriptors if not onlyjob or (d.name == onlyjob) ] for scheduler in schedulers: scheduler.run() def run(self): """Initiate scheduling of this job.""" signals.netbox_type_changed.connect(self.on_netbox_type_changed) self._setup_active_job_logging() self._start_netbox_reload_loop() def _start_netbox_reload_loop(self): if not self.netbox_reload_loop: self.netbox_reload_loop = task.LoopingCall(self._reload_netboxes) if self.netbox_reload_loop.running: self.netbox_reload_loop.stop() self.netbox_reload_loop.start(interval=self.netbox_reload_interval, now=True) def on_netbox_type_changed(self, netbox_id, new_type, **_kwargs): """Performs various cleanup and reload actions on a netbox type change signal. The netbox' data are cleaned up, and the next netbox data reload is scheduled to take place immediately. """ sysname = netbox_id in self.netboxes and \ self.netboxes[netbox_id].sysname or str(netbox_id) self._logger.info("Cancelling all jobs for %s due to type change.", sysname) self.cancel_netbox_scheduler(netbox_id) df = db.run_in_thread(shadows.Netbox.cleanup_replaced_netbox, netbox_id, new_type) return df.addCallback(lambda x: self._start_netbox_reload_loop()) def _setup_active_job_logging(self): if self.__class__.job_logging_loop is None: loop = task.LoopingCall(self.__class__.log_active_jobs) self.__class__.job_logging_loop = loop loop.start(interval=5 * 60.0, now=False) def _reload_netboxes(self): """Reload the set of netboxes to poll and update schedules.""" deferred = self.netboxes.load_all() deferred.addCallbacks(self._process_reloaded_netboxes, self._handle_reload_failures) db.django_debug_cleanup() return deferred def _process_reloaded_netboxes(self, result): """Process the result of a netbox reload and update schedules.""" (new_ids, removed_ids, changed_ids) = result # Deschedule removed and changed boxes for netbox_id in removed_ids.union(changed_ids): self.cancel_netbox_scheduler(netbox_id) # Schedule new and changed boxes def _lastupdated(netboxid): return self.netboxes[netboxid].last_updated.get( self.job.name, datetime.datetime.min) new_and_changed = sorted(new_ids.union(changed_ids), key=_lastupdated) for netbox_id in new_and_changed: self.add_netbox_scheduler(netbox_id) def _handle_reload_failures(self, failure): failure.trap(db.ResetDBConnectionError) self._logger.error("Reloading the IP device list failed because the " "database connection was reset") def add_netbox_scheduler(self, netbox_id): netbox = self.netboxes[netbox_id] scheduler = NetboxJobScheduler(self.job, netbox, self.pool) self.active_netboxes[netbox_id] = scheduler return scheduler.start() def cancel_netbox_scheduler(self, netbox_id): if netbox_id not in self.active_netboxes: return scheduler = self.active_netboxes[netbox_id] scheduler.cancel() del self.active_netboxes[netbox_id] @classmethod def reload(cls): """Reload netboxes for all jobs""" for scheduler in cls.active_schedulers: scheduler._reload_netboxes() @classmethod def log_active_jobs(cls, level=logging.DEBUG): """Debug logs a list of running job handlers. The handlers will be sorted by descending runtime. """ jobs = [(netbox_scheduler.netbox.sysname, netbox_scheduler.job.name, netbox_scheduler.get_current_runtime()) for scheduler in cls.active_schedulers for netbox_scheduler in scheduler.active_netboxes.values() if netbox_scheduler.is_running()] jobs.sort(key=itemgetter(2), reverse=True) table_formatter = SimpleTableFormatter(jobs) logger = logging.getLogger("%s.joblist" % __name__) if jobs: logger.log(level, "currently active jobs (%d):\n%s", len(jobs), table_formatter) else: logger.log(level, "no active jobs (%d JobHandlers)", JobHandler.get_instance_count())
class NetboxLoader(dict): """Loads netboxes from the database, synchronously or asynchronously. Access as a dictionary to retrieve information about the loaded netboxes. The dictionary keys are netbox table primary keys, the values are shadows.Netbox objects. """ _logger = ipdevpoll.ContextLogger() def __init__(self): super(NetboxLoader, self).__init__() self.peak_count = 0 # touch _logger to initialize logging context right away # pylint: disable=W0104 self._logger @autocommit def load_all_s(self): """Synchronously load netboxes from database. Returns: A three-tuple, (new_ids, lost_ids, changed_ids), whose elements are sets of netbox IDs. - The first set are IDs that a new since the last load operation. - The second is the set of IDs that have been removed since the last load operation. - The third is the set of IDs of netboxes whose information have changed in the database since the last load operation. """ related = ('room__location', 'type__vendor', 'category', 'organization', 'device') snmp_up_query = """SELECT COUNT(*) = 0 FROM alerthist WHERE alerthist.netboxid = netbox.netboxid AND eventtypeid='snmpAgentState' AND end_time >= 'infinity' """ queryset = (manage.Netbox.objects.select_related(*related).extra( select={'snmp_up': snmp_up_query})) netbox_list = storage.shadowify_queryset(queryset) netbox_dict = dict((netbox.id, netbox) for netbox in netbox_list) times = load_last_updated_times() for netbox in netbox_list: netbox.last_updated = times.get(netbox.id, {}) django_debug_cleanup() previous_ids = set(self.keys()) current_ids = set(netbox_dict.keys()) lost_ids = previous_ids.difference(current_ids) new_ids = current_ids.difference(previous_ids) same_ids = previous_ids.intersection(current_ids) changed_ids = set(i for i in same_ids if is_netbox_changed(self[i], netbox_dict[i])) # update self for i in lost_ids: del self[i] for i in new_ids: self[i] = netbox_dict[i] for i in same_ids: self[i].copy(netbox_dict[i]) self.peak_count = max(self.peak_count, len(self)) anything_changed = len(new_ids) or len(lost_ids) or len(changed_ids) log = self._logger.info if anything_changed else self._logger.debug log( "Loaded %d netboxes from database " "(%d new, %d removed, %d changed, %d peak)", len(netbox_dict), len(new_ids), len(lost_ids), len(changed_ids), self.peak_count) return (new_ids, lost_ids, changed_ids) def load_all(self): """Asynchronously load netboxes from database.""" return run_in_thread(self.load_all_s)
class NetboxLoader(dict): """Loads netboxes from the database, synchronously or asynchronously. Access as a dictionary to retrieve information about the loaded netboxes. The dictionary keys are netbox table primary keys, the values are shadows.Netbox objects. """ _logger = ipdevpoll.ContextLogger() def __init__(self): super(NetboxLoader, self).__init__() self.peak_count = 0 # touch _logger to initialize logging context right away # pylint: disable=W0104 self._logger def load_all_s(self): """Synchronously load netboxes from database. Returns: A three-tuple, (new_ids, lost_ids, changed_ids), whose elements are sets of netbox IDs. - The first set are IDs that a new since the last load operation. - The second is the set of IDs that have been removed since the last load operation. - The third is the set of IDs of netboxes whose information have changed in the database since the last load operation. """ related = ('room__location', 'type__vendor', 'category', 'organization') snmp_down = set( event.AlertHistory.objects.unresolved( 'snmpAgentState').values_list('netbox__id', flat=True)) self._logger.debug("These netboxes have active snmpAgentStates: %r", snmp_down) queryset = manage.Netbox.objects.filter(deleted_at__isnull=True) filter_groups_included = get_netbox_filter('groups_included') if filter_groups_included: queryset = queryset.filter(groups__id__in=filter_groups_included) filter_groups_excluded = get_netbox_filter('groups_excluded') if filter_groups_excluded: queryset = queryset.exclude(groups__id__in=filter_groups_excluded) queryset = list(queryset.select_related(*related)) for netbox in queryset: netbox.snmp_up = netbox.id not in snmp_down netbox_list = storage.shadowify_queryset(queryset) netbox_dict = dict((netbox.id, netbox) for netbox in netbox_list) times = load_last_updated_times() for netbox in netbox_list: netbox.last_updated = times.get(netbox.id, {}) django_debug_cleanup() previous_ids = set(self.keys()) current_ids = set(netbox_dict.keys()) lost_ids = previous_ids.difference(current_ids) new_ids = current_ids.difference(previous_ids) same_ids = previous_ids.intersection(current_ids) changed_ids = set(i for i in same_ids if is_netbox_changed(self[i], netbox_dict[i])) # update self for i in lost_ids: del self[i] for i in new_ids: self[i] = netbox_dict[i] for i in same_ids: self[i].copy(netbox_dict[i]) self.peak_count = max(self.peak_count, len(self)) anything_changed = len(new_ids) or len(lost_ids) or len(changed_ids) log = self._logger.info if anything_changed else self._logger.debug log( "Loaded %d netboxes from database " "(%d new, %d removed, %d changed, %d peak)", len(netbox_dict), len(new_ids), len(lost_ids), len(changed_ids), self.peak_count) return (new_ids, lost_ids, changed_ids) def load_all(self): """Asynchronously load netboxes from database.""" return run_in_thread(self.load_all_s)