def watch_escalations(cls, alarm): now = datetime.datetime.now() for esc in cls.get_class_escalations(alarm.alarm_class): for e_item in esc.escalations: # Check administrative domain if ( e_item.administrative_domain and e_item.administrative_domain.id not in alarm.adm_path ): continue # Check severity if e_item.min_severity and alarm.severity < e_item.min_severity: continue # Check selector if e_item.selector and not SelectorCache.is_in_selector( alarm.managed_object, e_item.selector ): continue logger.debug("[%s] Watch for %s after %s seconds", alarm.id, esc.name, e_item.delay) et = alarm.timestamp + datetime.timedelta(seconds=e_item.delay) if et > now: delay = (et - now).total_seconds() else: delay = None call_later( "noc.services.escalator.escalation.escalate", scheduler="escalator", pool=alarm.managed_object.escalator_shard, delay=delay, max_runs=esc.max_escalation_retries, alarm_id=alarm.id, escalation_id=esc.id, escalation_delay=e_item.delay, )
def on_save(self): if (hasattr(self, "_changed_fields") and "direct_objects" in self._changed_fields or hasattr(self, "_changed_fields") and "direct_segments" in self._changed_fields): self.update_affected_objects_maintenance() if hasattr(self, "_changed_fields") and "stop" in self._changed_fields: if not self.is_completed and self.auto_confirm: self.auto_confirm_maintenance() if self.escalate_managed_object: if not self.is_completed: call_later( "noc.services.escalator.maintenance.start_maintenance", delay=max( (dateutil.parser.parse(self.start) - datetime.datetime.now()).total_seconds(), 60, ), scheduler="escalator", pool=self.escalate_managed_object.escalator_shard, maintenance_id=self.id, ) else: call_later( "noc.services.escalator.maintenance.close_maintenance", scheduler="escalator", pool=self.escalate_managed_object.escalator_shard, maintenance_id=self.id, )
def check_down(alarm): """ Try to detect out-of-ordered opening and closing event for ping failed :param alarm: :return: """ status, last = alarm.managed_object.get_last_status() if not status or not last: return if alarm.timestamp >= last or not status: return # Out-of-ordered, dispose closing job logger.error( "[%s] Out of order closing event. Scheduling to close alarm after %d seconds", alarm.id, OO_CLOSE_DELAY, ) metrics["oo_pings_detected"] += 1 call_later( "noc.fm.handlers.alarm.status.close_oo_alarm", delay=OO_CLOSE_DELAY, scheduler="correlator", pool=alarm.managed_object.pool.name, alarm_id=alarm.id, timestamp=last, )
def api_delete(self, request, id): """ Override default method :param request: :param id: :return: """ try: o = self.queryset(request).get(id=int(id)) except self.model.DoesNotExist: return self.render_json({ "status": False, "message": "Not found" }, status=self.NOT_FOUND) if not o.has_access(request.user): return self.response_forbidden("Access denied") # Run sa.wipe_managed_object job instead o.name = "wiping-%d" % o.id o.is_managed = False o.description = "Wiping! Do not touch!" o.save() call_later("noc.sa.wipe.managedobject.wipe", o=o.id) return HttpResponse(orjson.dumps({"status": True}), status=self.DELETED)
def apply_changes(changes): """ :param changes: List of (datastream name, object id) :return: """ if changes: call_later("noc.core.datastream.change.do_changes", changes=changes)
def on_save(self): if not hasattr(self, "_changed_fields" ) or "interface_profile" in self._changed_fields: call_later( "noc.sa.models.serviceprofile.refresh_interface_profiles", sp_id=self.id, ip_id=self.interface_profile.id if self.interface_profile else None)
def auto_confirm_maintenance(self): stop = datetime.datetime.strptime(self.stop, "%Y-%m-%dT%H:%M:%S") now = datetime.datetime.now() if stop > now: delay = (stop - now).total_seconds() call_later("noc.maintenance.models.maintenance.stop", delay, maintenance_id=self.id)
def schedule_discovery_config(event): """ Reschedule discovery config processes """ call_later( event.managed_object.id, job_class=event.managed_object.BOX_DISCOVERY_JOB, _checks=["config"], max_runs=1, delay=config.correlator.discovery_delay, )
def alarm_escalation(alarm, mo, ctx): now = datetime.datetime.now() for esc in AlarmEscalation.get_class_escalations(alarm['alarm_class']): for e_item in esc.escalations: # Check administrative domain if (e_item.administrative_domain and e_item.administrative_domain.id not in mo.object.administrative_domain.get_path()): continue # Check severity if e_item.min_severity and alarm['severity'] < e_item.min_severity: continue # Check selector if e_item.selector and not SelectorCache.is_in_selector(mo.object, e_item.selector): continue logger.info( "%s Watch for %s after %s seconds", alarm['alarm_class'], esc.name, e_item.delay ) et = now + datetime.timedelta(seconds=e_item.delay) if et > now: delay = (et - now).total_seconds() else: delay = None if e_item.notification_group: a = ActiveAlarm.objects.filter(managed_object=mo.object, vars__path=alarm['vars']["path"]).first() if a: logger.info("Alarm already sending") break subject = e_item.template.render_subject(**ctx) body = e_item.template.render_body(**ctx) logger.debug("Notification message:\nSubject: %s\n%s", subject, body) call_later( "noc.custom.handlers.thresholds.thresholdsnotification.threshold_escalation", delay=delay, scheduler="scheduler", notification_group_id=e_item.notification_group.id, subject=subject, body=body ) return {"notification_group": e_item.notification_group, "clear_template": e_item.clear_template, "message": "Sending message to : %s" % e_item.notification_group.name} # if e_item.stop_processing: logger.debug("Stopping processing") break
def on_save(self): from .managedobject import CREDENTIAL_CACHE_VERSION from noc.inv.models.link import Link box_changed = self.initial_data[ "enable_box_discovery"] != self.enable_box_discovery periodic_changed = (self.initial_data["enable_periodic_discovery"] != self.enable_periodic_discovery) access_changed = ( (self.initial_data["access_preference"] != self.access_preference) or (self.initial_data["cli_privilege_policy"] != self.cli_privilege_policy) or (self.initial_data["beef_storage"] != self.beef_storage) or (self.initial_data["beef_path_template"] != self.beef_path_template) or (self.initial_data["snmp_rate_limit"] != self.snmp_rate_limit)) if box_changed or periodic_changed: call_later( "noc.sa.models.managedobjectprofile.apply_discovery_jobs", profile_id=self.id, box_changed=box_changed, periodic_changed=periodic_changed, ) if access_changed: cache.delete_many( [ "cred-%s" % x for x in self.managedobject_set.values_list("id", flat=True) ], version=CREDENTIAL_CACHE_VERSION, ) if (self.initial_data["enable_rca_downlink_merge"] != self.enable_rca_downlink_merge or self.initial_data["rca_downlink_merge_window"] != self.rca_downlink_merge_window): for x in set( chain.from_iterable( Link.objects.filter(linked_objects__in=list( set( self.managedobject_set.values_list( "id", flat=True)))).scalar( "linked_segments"))): call_later("noc.core.topology.segment.update_uplinks", 60, segment_id=x)
def on_save(self): self.update_affected_objects() if self.escalate_managed_object: if not self.is_completed: call_later( "noc.services.escalator.maintenance.start_maintenance", delay=(dateutil.parser.parse(self.start) - datetime.datetime.now()).seconds, scheduler="escalator", pool=self.escalate_managed_object.escalator_shard, maintenance_id=self.id) else: call_later( "noc.services.escalator.maintenance.close_maintenance", scheduler="escalator", pool=self.escalate_managed_object.escalator_shard, maintenance_id=self.id)
def watch_escalations(cls, alarm, timestamp_policy="a"): now = datetime.datetime.now() for esc in cls.get_class_escalations(alarm.alarm_class): for e_item in esc.escalations: # Check administrative domain if (e_item.administrative_domain and e_item.administrative_domain.id not in alarm.adm_path): continue # Check severity if e_item.min_severity and alarm.severity < e_item.min_severity: continue # Check selector if e_item.selector and not SelectorCache.is_in_selector( alarm.managed_object, e_item.selector): continue logger.debug("[%s] Watch for %s after %s seconds", alarm.id, esc.name, e_item.delay) et = alarm.timestamp + datetime.timedelta(seconds=e_item.delay) if timestamp_policy == "c": # If escalation with current timestamp - shift consequence after main escalation delay = max( (et - now).total_seconds(), 120) + 120 if et > now else 120 logger.info( "[%s] Watch escalation with create new timestamp policy, after %s seconds", alarm.id, delay, ) elif et > now: delay = (et - now).total_seconds() else: delay = None call_later( "noc.services.escalator.escalation.escalate", scheduler="escalator", pool=alarm.managed_object.escalator_shard, delay=delay, max_runs=esc.max_escalation_retries, alarm_id=alarm.id, escalation_id=esc.id, escalation_delay=e_item.delay, timestamp_policy=timestamp_policy, ) if e_item.stop_processing: break
def on_save(self): from .phonenumber import PhoneNumber # Borrow own phone numbers from parent if self.parent: for n in PhoneNumber.objects.filter(phone_range=self.parent.id, number__gte=self.from_number, number__lte=self.to_number): n.phone_range = self n.save() # Allocate numbers when necessary if self.to_allocate_numbers: call_later("noc.phone.models.phonerange.allocate_numbers", range_id=self.id) # Reparent nested ranges for r in PhoneRange.objects.filter(dialplan=self.dialplan, from_number__gte=self.from_number, from_number__lte=self.to_number, to_number__lte=self.to_number, id__ne=self.id): r.save()
def on_clear(cls, alarm): """ Submit clear jobs :param alarm: :return: """ cfg = defaultdict(list) for c in cls.get_class_diagnostics(alarm.alarm_class): if c.selector and not SelectorCache.is_in_selector( alarm.managed_object, c.selector ): continue if c.only_root and alarm.root: continue if c.enable_on_clear: if c.on_clear_script: cfg[c.on_clear_delay] += [{ "script": c.on_clear_script, "header": c.on_clear_header }] if c.on_clear_action: cfg[c.on_clear_delay] += [{ "action": c.on_clear_action.id, "header": c.on_clear_header }] if c.on_clear_handler: cfg[c.on_clear_delay] += [{ "handler": c.on_clear_handler, "header": c.on_clear_header }] # Submit on_clear job for delay in cfg: call_later( "noc.fm.models.alarmdiagnosticconfig.on_clear", scheduler="correlator", pool=alarm.managed_object.pool.name, delay=delay, alarm=alarm.id, cfg=cfg[delay] ) AlarmDiagnostic.clear_diagnostics(alarm)
def handle_close(self, close_alarms=None, *args, **kwargs): close_alarms = close_alarms or [] for a_id in close_alarms: alarm = get_alarm(a_id) if alarm and alarm.status == "A" and alarm.escalation_tt: self.print("Sending TT close for alarm %s to escalator" % alarm.id) call_later( "noc.services.escalator.escalation.notify_close", scheduler="escalator", pool=alarm.managed_object.escalator_shard, alarm_id=alarm.id, tt_id=alarm.escalation_tt, subject="Closed", body="Closed", notification_group_id=None, close_tt=False, ) elif alarm: self.print("ERROR: Alarm %s is not escalated. Skipping" % alarm) else: self.print("ERROR: Alarm %s is not found. Skipping" % alarm)
def on_save(self): box_changed = self.initial_data["enable_box_discovery"] != self.enable_box_discovery periodic_changed = self.initial_data["enable_periodic_discovery"] != self.enable_periodic_discovery access_changed = ( (self.initial_data["access_preference"] != self.access_preference) or (self.initial_data["cli_privilege_policy"] != self.cli_privilege_policy) ) if box_changed or periodic_changed: call_later( "noc.sa.models.managedobjectprofile.apply_discovery_jobs", profile_id=self.id, box_changed=box_changed, periodic_changed=periodic_changed ) if access_changed: cache.delete_many([ "cred-%s" % x for x in self.managedobject_set.values_list("id", flat=True) ])
def on_enter_state(self, obj): """ Called when object enters state :param obj: :return: """ # Process on enter handlers if self.on_enter_handlers: logger.debug("[%s|%s] Running on_enter_handlers", obj, obj.state.name) for hn in self.on_enter_handlers: try: h = get_handler(str(hn)) except ImportError as e: logger.error("Error import on_enter handler: %s" % e) h = None if h: logger.debug("[%s|%s] Running %s", obj, self.name, hn) h(obj) # @todo: Catch exceptions else: logger.debug("[%s|%s] Invalid handler %s, skipping", obj, self.name, hn) # Run Job handler when necessary if self.job_handler: logger.debug("[%s|%s] Running job handler %s", obj.self.name, self.job_handler) try: h = get_handler(self.job_handler) except ImportError as e: logger.error("Error import state job handler: %s" % e) h = None if h: call_later(STATE_JOB, handler=self.job_handler, model=get_model_id(obj), object=str(obj.pk)) else: logger.debug("[%s|%s] Invalid job handler %s, skipping", obj, self.name, self.job_handler)
def refresh(cls): call_later("noc.sa.models.selectorcache.refresh", delay=10)
def update_pop_links(self, delay: int = 20): call_later("noc.inv.util.pop_links.update_pop_links", delay, pop_id=self.id)
def update_uplinks(self): # if self.profile.is_persistent: call_later("noc.core.topology.segment.update_uplinks", 60, segment_id=self.id)
def update_affected_objects_maintenance(self): call_later( "noc.maintenance.models.maintenance.update_affected_objects", 60, maintenance_id=self.id, )
def clear_alarm(self, message, ts=None, force=False): """ Clear alarm :param message: Log clearing message :param ts: Clearing timestamp :param force: Clear ever if wait_tt seg """ ts = ts or datetime.datetime.now() if self.wait_tt and not force: # Wait for escalated tt to close if not self.wait_ts: self.wait_ts = ts self.log_message("Waiting for TT to close") call_later( "noc.services.escalator.wait_tt.wait_tt", scheduler="escalator", pool=self.managed_object.escalator_shard, alarm_id=self.id ) return if self.alarm_class.clear_handlers: # Process clear handlers for h in self.alarm_class.get_clear_handlers(): try: h(self) except Exception: error_report() log = self.log + [AlarmLog(timestamp=ts, from_status="A", to_status="C", message=message)] a = ArchivedAlarm( id=self.id, timestamp=self.timestamp, clear_timestamp=ts, managed_object=self.managed_object, alarm_class=self.alarm_class, severity=self.severity, vars=self.vars, log=log, root=self.root, escalation_ts=self.escalation_ts, escalation_tt=self.escalation_tt, escalation_error=self.escalation_error, escalation_ctx=self.escalation_ctx, opening_event=self.opening_event, closing_event=self.closing_event, discriminator=self.discriminator, reopens=self.reopens, direct_services=self.direct_services, direct_subscribers=self.direct_subscribers, total_objects=self.total_objects, total_services=self.total_services, total_subscribers=self.total_subscribers, adm_path=self.adm_path, segment_path=self.segment_path, container_path=self.container_path, uplinks=self.uplinks ) ct = self.alarm_class.get_control_time(self.reopens) if ct: a.control_time = datetime.datetime.now() + datetime.timedelta(seconds=ct) a.save() # Send notifications if not a.root and not self.reopens: a.managed_object.event(a.managed_object.EV_ALARM_CLEARED, { "alarm": a, "subject": a.subject, "body": a.body, "symptoms": a.alarm_class.symptoms, "recommended_actions": a.alarm_class.recommended_actions, "probable_causes": a.alarm_class.probable_causes }) elif ct: pass # Set checks on all consequences for d in self._get_collection().find({ "root": self.id }, {"_id": 1, "alarm_class": 1}): ac = AlarmClass.get_by_id(d["alarm_class"]) if not ac: continue t = ac.recover_time if not t: continue call_later( "noc.services.correlator.check.check_close_consequence", scheduler="correlator", pool=self.managed_object.pool.name, delay=t, alarm_id=d["_id"] ) # Clear alarm self.delete() # Close TT # MUST be after .delete() to prevent race conditions if a.escalation_tt or self.clear_template: if self.clear_template: ctx = { "alarm": a } subject = self.clear_template.render_subject(**ctx) body = self.clear_template.render_body(**ctx) else: subject = "Alarm cleared" body = "Alarm has been cleared" call_later( "noc.services.escalator.escalation.notify_close", scheduler="escalator", pool=self.managed_object.escalator_shard, max_runs=ALARM_CLOSE_RETRIES, alarm_id=self.id, tt_id=self.escalation_tt, subject=subject, body=body, notification_group_id=self.clear_notification_group.id if self.clear_notification_group else None, close_tt=self.close_tt ) # Gather diagnostics AlarmDiagnosticConfig.on_clear(a) # Return archived return a
def apply_changes(self, changes: List[Tuple[str, Any]]): if changes: call_later("noc.core.datastream.change.do_changes", changes=changes)
def on_raise(cls, alarm): """ Submit raise and periodic jobs :param alarm: :return: """ r_cfg = defaultdict(list) p_cfg = defaultdict(list) for c in cls.get_class_diagnostics(alarm.alarm_class): if c.selector and not SelectorCache.is_in_selector( alarm.managed_object, c.selector ): continue if c.only_root and alarm.root: continue if c.enable_on_raise: if c.on_raise_script: r_cfg[c.on_raise_delay] += [{ "script": c.on_raise_script, "header": c.on_raise_header }] if c.on_raise_action: r_cfg[c.on_raise_delay] += [{ "action": c.on_raise_action.name, "header": c.on_raise_header }] if c.on_raise_handler: r_cfg[c.on_raise_delay] += [{ "handler": c.on_raise_handler, "header": c.on_raise_header }] if c.enable_periodic: if c.periodic_script: p_cfg[c.periodic_interval] += [{ "script": c.periodic_script, "header": c.periodic_header }] if c.periodic_action: p_cfg[c.periodic_interval] += [{ "action": c.periodic_action.name, "header": c.periodic_header }] if c.periodic_handler: p_cfg[c.periodic_interval] += [{ "handler": c.periodic_handler, "header": c.periodic_header }] # Submit on_raise job for delay in r_cfg: call_later( "noc.fm.models.alarmdiagnosticconfig.on_raise", scheduler="correlator", pool=alarm.managed_object.pool.name, delay=delay, alarm=alarm.id, cfg=r_cfg[delay] ) # Submit periodic job for delay in p_cfg: call_later( "noc.fm.models.alarmdiagnosticconfig.periodic", scheduler="correlator", max_runs=PERIODIC_JOB_MAX_RUNS, pool=alarm.managed_object.pool.name, delay=delay, alarm=alarm.id, cfg={"cfg": p_cfg[delay], "delay": delay} )
def refresh_object(cls, managed_object): if hasattr(managed_object, "id"): managed_object = managed_object.id call_later("noc.sa.models.servicesummary.refresh_object", delay=20, managed_object=managed_object)
def on_save(self): call_later( "noc.core.clickhouse.ensure.ensure_all_pm_scopes", scheduler="scheduler", delay=30 )
def update_uplinks(self): call_later( "noc.core.topology.segment.update_uplinks", 60, segment_id=self.id )