Exemplo n.º 1
0
    def topology_rca_uplink(self, alarm, alarms, seen=None, ts=None):
        def can_correlate(a1, a2):
            return (
                not config.correlator.topology_rca_window
                or (a1.timestamp - a2.timestamp).total_seconds()
                <= config.correlator.topology_rca_window
            )

        ts = ts or alarm.timestamp
        seen = seen or set()
        self.print(
            ">>> topology_rca(%s, %s)" % (alarm.id, "{%s}" % ", ".join(str(x) for x in seen))
        )
        if hasattr(alarm, "_trace_root"):
            self.print("<<< already correlated")
            return
        if alarm.id in seen:
            self.print("<<< already seen")
            return  # Already correlated
        seen.add(alarm.id)
        o_id = alarm.managed_object.id
        # Get neighbor objects
        neighbors = set()
        uplinks = []
        ou = ObjectData.get_by_id(object=o_id)
        if ou and ou.uplinks:
            uplinks = ou.uplinks
            neighbors.update(uplinks)
        for du in ObjectData.get_neighbors(o_id):
            neighbors.add(du)
        if not neighbors:
            self.print("<<< no neighbors")
            return
        # Get neighboring alarms
        na = {}
        for n in neighbors:
            a = alarms.get(n)
            if a and a.timestamp <= ts:
                na[n] = a
        self.print(
            "    Neighbor alarms: %s"
            % ", ".join(
                "%s%s (%s)" % ("U:" if x in uplinks else "", na[x], ManagedObject.get_by_id(x).name)
                for x in na
            )
        )
        self.print("    Uplinks: %s" % ", ".join(ManagedObject.get_by_id(u).name for u in uplinks))
        if uplinks and len([na[o] for o in uplinks if o in na]) == len(uplinks):
            # All uplinks are faulty
            # uplinks are ordered according to path length
            # Correlate with first applicable
            for u in uplinks:
                a = na[u]
                if can_correlate(alarm, a):
                    self.print("+++ SET ROOT %s -> %s" % (alarm.id, a.id))
                    alarm._trace_root = a.id
                    break
        # Correlate neighbors' alarms
        for d in na:
            self.topology_rca_uplink(na[d], alarms, seen, ts)
        self.print("<<< done")
Exemplo n.º 2
0
    def handle(self, alarm, delta, trace=False, *args, **kwargs):
        def nq(s):
            return s.split("#", 1)[0]

        connect()
        if config.fm.enable_rca_neighbor_cache:
            self.topology_rca = self.topology_rca_neighbor
        else:
            self.topology_rca = self.topology_rca_uplink
        try:
            a0 = ArchivedAlarm.objects.get(id=alarm[0])
        except ArchivedAlarm.DoesNotExist:
            self.die("Cannot find alarm")
        t0 = a0.timestamp - datetime.timedelta(seconds=delta)
        t1 = a0.timestamp + datetime.timedelta(seconds=delta)
        alarms = {}
        mos = list(a0.managed_object.segment.managed_objects)
        for a in ArchivedAlarm.objects.filter(
            timestamp__gte=t0, timestamp__lte=t1, managed_object__in=[o.id for o in mos]
        ):
            alarms[a.managed_object.id] = a
        # Enrich with roots

        # Get object segment data
        r = []
        for mo in mos:
            uplink1, uplink2 = "", ""
            d = ObjectData.get_by_id(mo)
            if d:
                uplinks = [ManagedObject.get_by_id(u) for u in d.uplinks]
                uplinks = [u for u in uplinks if u]
                if uplinks:
                    uplink1 = nq(uplinks.pop(0).name)
                if uplinks:
                    uplink2 = nq(uplinks.pop(0).name)
            a = alarms.get(mo.id)
            r += [
                Record(
                    timestamp=a.timestamp.strftime("%Y-%m-%d %H:%M:%S") if a else "",
                    alarm_id=a.id if a else "",
                    root_id=a.root if a and a.root else "",
                    managed_object=nq(mo.name),
                    address=mo.address,
                    platform=mo.platform,
                    uplink1=uplink1,
                    uplink2=uplink2,
                )
            ]
        MASK = "%19s | %24s | %24s | %16s | %15s | %20s | %16s | %16s"
        self.print(
            MASK % ("ts", "alarm", "root", "object", "address", "platform", "uplink1", "uplink2")
        )
        for x in sorted(r, key=operator.attrgetter("timestamp")):
            self.print(MASK % x)
        if trace:
            self.print("Time range: %s -- %s" % (t0, t1))
            self.print(
                "Topology RCA Window: %s"
                % (
                    "%ss" % config.correlator.topology_rca_window
                    if config.correlator.topology_rca_window
                    else "Disabled"
                )
            )
            amap = dict((a.id, a) for a in six.itervalues(alarms))
            for x in sorted(r, key=operator.attrgetter("timestamp")):
                if not x.alarm_id:
                    continue
                self.print("@@@ %s %s %s" % (x.timestamp, x.alarm_id, x.managed_object))
                self.topology_rca(amap[x.alarm_id], alarms)
            # Dump
            for a in amap:
                if hasattr(amap[a], "_trace_root"):
                    self.print("%s -> %s" % (a, amap[a]._trace_root))
Exemplo n.º 3
0
    def topology_rca_neighbor(self, alarm, alarms, ts=None):
        def can_correlate(a1, a2):
            """
            Check if alarms can be correlated together (within corellation window)
            :param a1:
            :param a2:
            :return:
            """
            return (
                not config.correlator.topology_rca_window
                or (a1.timestamp - a2.timestamp).total_seconds()
                <= config.correlator.topology_rca_window
            )

        def all_uplinks_failed(a1):
            """
            Check if all uplinks for alarm is failed
            :param a1:
            :return:
            """
            if not a1.uplinks:
                return False
            return sum(1 for mo in a1.uplinks if mo in alarms) == len(a1.uplinks)

        def get_root(a1):
            """
            Get root cause for failed uplinks.
            Considering all uplinks are failed.
            Uplinks are ordered according to path length.
            Return first applicable

            :param a1:
            :return:
            """
            for u in a1.uplinks:
                na = alarms[u]
                if can_correlate(a1, na):
                    return na
            return None

        def iter_downlink_alarms(a1):
            """
            Yield all downlink alarms
            :param a1:
            :return:
            """
            imo = a1.managed_object.id
            for ina in six.itervalues(alarms):
                if ina.uplinks and imo in ina.uplinks:
                    yield ina

        def correlate(a1):
            """
            Correlate with uplink alarms if all aplinks are faulty.
            :param a1:
            :return:
            """
            if not all_uplinks_failed(a1):
                return
            a2 = get_root(a1)
            if a2:
                self.print("+++ SET ROOT %s -> %s" % (a1.id, a2.id))
                a1._trace_root = a2.id

        ts = ts or alarm.timestamp
        self.print(">>> topology_rca(%s)" % alarm.id)
        if hasattr(alarm, "_trace_root"):
            self.print("<<< already correlated")
            return
        # Get neighboring alarms
        na = {}
        uplinks = set()
        mo = alarm.managed_object.id
        for n in alarms:
            a = alarms.get(n)
            if a and a.timestamp <= ts and mo in a.rca_neighbors:
                na[n] = a
                uplinks |= set(a.uplinks)
        self.print(
            "    Neighbor alarms: %s"
            % ", ".join(
                "%s%s (%s)" % ("U:" if x in uplinks else "", na[x], ManagedObject.get_by_id(x).name)
                for x in na
            )
        )
        self.print("    Uplinks: %s" % ", ".join(ManagedObject.get_by_id(u).name for u in uplinks))
        # Correlate current alarm
        correlate(alarm)
        # Correlate all downlink alarms
        for a in iter_downlink_alarms(alarm):
            correlate(a)
        self.print("<<< done")