示例#1
0
文件: node.py 项目: fossabot/noc
class Node(nosql.Document):
    meta = {"collection": "noc.wf.nodes", "allow_inheritance": False}

    workflow = nosql.PlainReferenceField(Workflow)
    lane = nosql.PlainReferenceField(Lane)
    name = nosql.StringField()
    label = nosql.StringField()
    description = nosql.StringField()
    handler = nosql.StringField()
    # param -> value
    params = nosql.RawDictField()
    # Connections
    next_node = nosql.PlainReferenceField("self")
    next_true_node = nosql.PlainReferenceField("self")
    next_false_node = nosql.PlainReferenceField("self")
    # Graph position
    x = nosql.IntField()
    y = nosql.IntField()

    def __unicode__(self):
        return "%s %s" % (self.workflow, self.name)

    @property
    def handler_class(self):
        m = __import__("noc.wf.handlers", {}, {}, str(self.handler))
        return getattr(m, "%sHandler" % self.handler)
示例#2
0
class Workflow(nosql.Document):
    meta = {"collection": "noc.wf.workflows", "allow_inheritance": False}

    # Unique identifier
    name = nosql.StringField()
    # Long name
    display_name = nosql.StringField()
    solution = nosql.PlainReferenceField(Solution)
    version = nosql.IntField()
    is_active = nosql.BooleanField()
    description = nosql.StringField()
    #
    start_node = nosql.StringField()
    # Permissions
    # stat_permission = nosql.StringField()
    # trace_permission = nosql.StringField()
    # kill_permission = nosql.StringField()
    trace = nosql.BooleanField(default=False)

    def __unicode__(self):
        return "%s.%s v%s" % (self.solution.name, self.name, self.version)

    def get_node(self, name):
        return Node.objects.filter(workflow=self.id, name=name).first()

    def get_start_node(self):
        return Node.objects.filter(workflow=self.id,
                                   id=self.start_node).first()

    def run(self, _trace=None, **kwargs):
        """
        Run process
        :param kwargs:
        :return: Process instance
        """
        # Find start node
        start_node = self.get_start_node()
        if not start_node:
            raise InvalidStartNode(self.start_node)
        #
        trace = self.trace if _trace is None else _trace
        # Prepare context
        ctx = {}
        for v in Variable.objects.filter(workflow=self.id):
            if v.name in kwargs:
                ctx[v.name] = v.clean(kwargs[v.name])
            elif v.default:
                ctx[v.name] = v.clean(v.default)
            else:
                ctx[v.name] = None

        p = Process(workflow=self,
                    context=ctx,
                    start_time=datetime.datetime.now(),
                    node=start_node,
                    trace=trace)
        p.save()
        # Schedule job
        p.schedule()
        return p
示例#3
0
文件: variable.py 项目: fossabot/noc
class Variable(nosql.Document):
    meta = {"collection": "noc.wf.variables", "allow_inheritance": False}

    workflow = nosql.PlainReferenceField(Workflow)
    name = nosql.StringField()
    type = nosql.StringField(
        choices=[("str", "String"), ("int",
                                     "Integer"), ("bool",
                                                  "Boolean"), ("float",
                                                               "Float")])
    default = nosql.StringField()
    # Required to start the process
    required = nosql.BooleanField()
    description = nosql.StringField()

    def __unicode__(self):
        return "%s %s (%s)" % (self.workflow, self.name, self.type)

    def clean(self, value):
        return getattr(self, "clean_%s" % self.type)(value)

    def clean_str(self, value):
        return value

    def clean_int(self, value):
        return int(value)

    def clean_bool(self, value):
        return value.lower() in ["on", "true", "yes"]

    def clean_float(self, value):
        return float(value)
示例#4
0
class AlarmRootCauseCondition(nosql.EmbeddedDocument):
    meta = {
        "strict": False,
        "auto_create_index": False
    }

    name = nosql.StringField(required=True)
    root = nosql.PlainReferenceField("fm.AlarmClass")
    window = nosql.IntField(required=True)
    condition = nosql.StringField(default="True")
    match_condition = nosql.DictField(required=True)

    def __unicode__(self):
        return self.name

    def __eq__(self, other):
        return (
            self.name == other.name and
            (
                (self.root is None and other.root is None) or
                (
                    self.root and other.root and
                    self.root.id == other.root.id
                )
            ) and
            self.window == other.window and
            self.condition == other.condition and
            self.match_condition == other.match_condition
        )
示例#5
0
class Lane(nosql.Document):
    meta = {"collection": "noc.wf.lanes", "allow_inheritance": False}

    workflow = nosql.PlainReferenceField(Workflow)
    name = nosql.StringField()
    is_active = nosql.BooleanField()

    def __unicode__(self):
        return "%s %s" % (self.workflow, self.name)
示例#6
0
class ArchivedEvent(document.Document):
    """
    """

    meta = {
        "collection": "noc.events.archive",
        "strict": False,
        "auto_create_index": False,
        "indexes": ["timestamp", "alarms"],
    }
    status = "S"

    timestamp = fields.DateTimeField(required=True)
    managed_object = nosql.ForeignKeyField(ManagedObject, required=True)
    event_class = nosql.PlainReferenceField(EventClass, required=True)
    start_timestamp = fields.DateTimeField(required=True)
    repeats = fields.IntField(required=True)
    raw_vars = nosql.RawDictField()
    resolved_vars = nosql.RawDictField()
    vars = fields.DictField()
    log = fields.ListField(nosql.EmbeddedDocumentField(EventLog))
    alarms = fields.ListField(nosql.ObjectIdField())

    def __str__(self):
        return "%s" % self.id

    @property
    def duration(self):
        """
        Logged event duration in seconds
        """
        return (self.timestamp - self.start_timestamp).total_seconds()

    def get_template_vars(self):
        """
        Prepare template variables
        """
        vars = self.vars.copy()
        vars.update({"event": self})
        return vars

    @property
    def subject(self):
        ctx = Context(self.get_template_vars())
        s = Template(self.event_class.subject_template).render(ctx)
        if len(s) >= 255:
            s = s[:125] + " ... " + s[-125:]
        return s

    @property
    def body(self):
        ctx = Context(self.get_template_vars())
        s = Template(self.event_class.body_template).render(ctx)
        return s
示例#7
0
文件: mibdata.py 项目: fossabot/noc
class MIBData(nosql.Document):
    meta = {
        "collection": "noc.mibdata",
        "allow_inheritance": False,
        "indexes": ["oid", "name", "mib", "aliases"]
    }
    mib = nosql.PlainReferenceField(MIB)
    oid = nosql.StringField(required=True, unique=True)
    name = nosql.StringField(required=True)
    description = nosql.StringField(required=False)
    syntax = nosql.DictField(required=False)
    aliases = nosql.ListField(nosql.StringField(), default=[])

    def __unicode__(self):
        return self.name
示例#8
0
class EventSuppressionRule(EmbeddedDocument):
    meta = {"strict": False}
    name = fields.StringField()
    condition = fields.StringField(required=True, default="True")
    event_class = nosql.PlainReferenceField("fm.EventClass", required=True)
    match_condition = fields.DictField(required=True, default={})
    window = fields.IntField(required=True, default=3600)
    suppress = fields.BooleanField(required=True, default=True)

    def __unicode__(self):
        return self.name

    def __eq__(self, other):
        return (self.name == other.name and self.condition == other.condition
                and self.event_class.id == other.event_class.id
                and self.match_condition == other.match_condition
                and self.window == other.window
                and self.suppress == other.suppress)
示例#9
0
class EventDispositionRule(EmbeddedDocument):
    meta = {"strict": False}
    # Name, unique within event class
    name = fields.StringField(required=True, default="dispose")
    # Python logical expression to check do the rules
    # applicable or not.
    condition = fields.StringField(required=True, default="True")
    # Python logical expression to evaluate managed object
    managed_object = fields.StringField(required=False)
    # What to do with disposed event:
    #    drop - delete and stop disposition
    #    ignore - stop disposition
    #    pyrule - execute pyrule
    #    raise - raise an alarm
    #    clear - clear alarm
    #
    action = fields.StringField(required=True,
                                choices=[(x, x) for x in ("drop", "ignore",
                                                          "raise", "clear")])
    # Applicable for actions: raise and clear
    alarm_class = nosql.PlainReferenceField(AlarmClass, required=False)
    # Additional condition. Raise or clear action
    # will be performed only if additional events occured during time window
    combo_condition = fields.StringField(
        required=False,
        default="none",
        choices=[
            (x, x) for x in (
                # Apply action immediately
                "none",
                # Apply when event firing rate
                # exceeds combo_count times during combo_window
                "frequency",
                # Apply action if event followed by all combo events
                # in strict order
                "sequence",
                # Apply action if event followed by all combo events
                # in no specific order
                "all",
                # Apply action if event followed by any of combo events
                "any")
        ])
    # Time window for combo events in seconds
    combo_window = fields.IntField(required=False, default=0)
    # Applicable for frequency.
    combo_count = fields.IntField(required=False, default=0)
    # Applicable for sequence, all and any combo_condition
    combo_event_classes = fields.ListField(
        nosql.PlainReferenceField("fm.EventClass"), required=False, default=[])
    # event var name -> alarm var name mappings
    # try to use direct mapping if not set explicitly
    var_mapping = fields.DictField(required=False)
    # Stop event disposition if True or continue with next rule
    stop_disposition = fields.BooleanField(required=False, default=True)

    def __unicode__(self):
        return "%s: %s" % (self.action, self.alarm_class.name)

    def __eq__(self, other):
        for a in [
                "name", "condition", "action", "pyrule", "window",
                "var_mapping", "stop_disposition", "managed_object"
        ]:
            if hasattr(self, a) != hasattr(other, a):
                return False
            if hasattr(self, a) and getattr(self, a) != getattr(other, a):
                return False
        if self.alarm_class is None and other.alarm_class is None:
            return True
        if (self.alarm_class is None or other.alarm_class is None
                or self.alarm_class.name != other.alarm_class.name):
            return False
        return True
示例#10
0
class ActiveAlarm(nosql.Document):
    meta = {
        "collection":
        "noc.alarms.active",
        "allow_inheritance":
        False,
        "indexes": [
            "timestamp", "discriminator", "root", "-severity", "alarm_class",
            ("timestamp", "managed_object")
        ]
    }
    status = "A"

    timestamp = nosql.DateTimeField(required=True)
    last_update = nosql.DateTimeField(required=True)
    managed_object = nosql.ForeignKeyField(ManagedObject)
    alarm_class = nosql.PlainReferenceField(AlarmClass)
    severity = nosql.IntField(required=True)
    vars = nosql.DictField()
    # Calculated alarm discriminator
    # Has meaning only for alarms with is_unique flag set
    # Calculated as sha1("value1\x00....\x00valueN").hexdigest()
    discriminator = nosql.StringField(required=False)
    log = nosql.ListField(nosql.EmbeddedDocumentField(AlarmLog))
    # Responsible person
    owner = nosql.ForeignKeyField(User, required=False)
    #
    opening_event = nosql.ObjectIdField(required=False)
    closing_event = nosql.ObjectIdField(required=False)
    # List of subscribers
    subscribers = nosql.ListField(nosql.ForeignKeyField(User))
    #
    custom_subject = nosql.StringField(required=False)
    custom_style = nosql.ForeignKeyField(Style, required=False)
    #
    reopens = nosql.IntField(required=False)
    # RCA
    # Reference to root cause (Active Alarm or Archived Alarm instance)
    root = nosql.ObjectIdField(required=False)

    def __unicode__(self):
        return u"%s" % self.id

    def save(self, *args, **kwargs):
        if not self.last_update:
            self.last_update = self.timestamp
        return super(ActiveAlarm, self).save(*args, **kwargs)

    def _change_root_severity(self):
        """
        Change root severity, when necessary
        """
        if not self.root:
            return
        root = get_alarm(self.root)
        if root and root.severity < self.severity:
            root.change_severity(self.severity)
            root.log_message("Severity has been increased by child alarm %s" %
                             self.id)

    def change_severity(self, user="", delta=None, severity=None):
        """
        Change alarm severity
        """
        if isinstance(user, User):
            user = user.username
        if delta:
            self.severity = max(0, self.severity + delta)
            if delta > 0:
                self.log_message("%s has increased alarm severity by %s" %
                                 (user, delta))
            else:
                self.log_message("%s has decreased alarm severity by %s" %
                                 (user, delta))
        elif severity:
            self.severity = severity.severity
            self.log_message("%s has changed severity to %s" %
                             (user, severity.name))
        self._change_root_severity()
        self.save()

    def log_message(self, message, to_save=True):
        self.log += [
            AlarmLog(timestamp=datetime.datetime.now(),
                     from_status=self.status,
                     to_status=self.status,
                     message=message)
        ]
        if to_save:
            self.save()

    def contribute_event(self, e, open=False, close=False):
        # Set opening event when necessary
        if open:
            self.opening_event = e.id
        # Set closing event when necessary
        if close:
            self.closing_event = e.id
        # Update timestamp
        if e.timestamp < self.timestamp:
            self.timestamp = e.timestamp
        else:
            self.last_update = max(self.last_update, e.timestamp)
        self.save()
        # Update event's list of alarms
        if self.id not in e.alarms:
            e.alarms.append(self.id)
            e.save()

    def clear_alarm(self, message):
        ts = datetime.datetime.now()
        log = self.log + [
            AlarmLog(
                timestamp=ts, from_status="A", to_status="C", message=message)
        ]
        a = ArchivedAlarm(id=self.id,
                          timestamp=self.timestamp,
                          clear_timestamp=ts,
                          managed_object=self.managed_object,
                          alarm_class=self.alarm_class,
                          severity=self.severity,
                          vars=self.vars,
                          log=log,
                          root=self.root,
                          opening_event=self.opening_event,
                          closing_event=self.closing_event,
                          discriminator=self.discriminator,
                          reopens=self.reopens)
        ct = self.alarm_class.get_control_time(self.reopens)
        if ct:
            a.control_time = datetime.datetime.now() + datetime.timedelta(
                seconds=ct)
        a.save()
        # @todo: Clear related correlator jobs
        self.delete()
        # Send notifications
        if not a.root and not self.reopens:
            a.managed_object.event(
                a.managed_object.EV_ALARM_CLEARED, {
                    "alarm": a,
                    "subject": a.subject,
                    "body": a.body,
                    "symptoms": a.alarm_class.symptoms,
                    "recommended_actions": a.alarm_class.recommended_actions,
                    "probable_causes": a.alarm_class.probable_causes
                })
        elif ct:
            # Schedule delayed job
            submit_job("fm.correlator",
                       "control_notify",
                       key=a.id,
                       ts=a.control_time)
        return a

    def get_template_vars(self):
        """
        Prepare template variables
        """
        vars = self.vars.copy()
        vars.update({"alarm": self})
        return vars

    @property
    def subject(self):
        ctx = Context(self.get_template_vars())
        s = Template(self.alarm_class.subject_template).render(ctx)
        if len(s) >= 255:
            s = s[:125] + " ... " + s[-125:]
        return s

    @property
    def body(self):
        ctx = Context(self.get_template_vars())
        s = Template(self.alarm_class.body_template).render(ctx)
        return s

    def change_owner(self, user):
        """
        Change alarm's owner
        """
        self.owner = user
        self.save()

    def subscribe(self, user):
        """
        Change alarm's subscribers
        """
        if user.id not in self.subscribers:
            self.subscribers += [user.id]
            self.log_message(
                "%s(%s) has been subscribed" %
                ((" ".join([user.first_name, user.last_name]), user.username)),
                to_save=False)
            self.save()

    def unsubscribe(self, user):
        if self.is_subscribed(user):
            self.subscribers = [u.id for u in self.subscribers if u != user.id]
            self.log_message(
                "%s(%s) has been unsubscribed" %
                ((" ".join([user.first_name, user.last_name]), user.username)),
                to_save=False)
            self.save()

    def is_owner(self, user):
        return self.owner == user

    def is_subscribed(self, user):
        return user.id in self.subscribers

    @property
    def is_unassigned(self):
        return self.owner is None

    @property
    def duration(self):
        dt = datetime.datetime.now() - self.timestamp
        return dt.days * 86400 + dt.seconds

    @property
    def display_duration(self):
        duration = datetime.datetime.now() - self.timestamp
        secs = duration.seconds % 60
        mins = (duration.seconds / 60) % 60
        hours = (duration.seconds / 3600) % 24
        days = duration.days
        r = "%02d:%02d:%02d" % (hours, mins, secs)
        if days:
            r = "%dd %s" % (days, r)
        return r

    @property
    def effective_style(self):
        if self.custom_style:
            return self.custom_style
        else:
            return AlarmSeverity.get_severity(self.severity).style

    def set_root(self, root_alarm):
        """
        Set root cause
        """
        if self.root:
            return
        if self.id == root_alarm.id:
            raise Exception("Cannot set self as root cause")
        # Detect loop
        root = root_alarm
        while root and root.root:
            root = root.root
            if root == self.id:
                return
            root = get_alarm(root)
        # Set root
        self.root = root_alarm.id
        self.log_message("Alarm %s has been marked as root cause" %
                         root_alarm.id)
        # self.save()  Saved by log_message
        root_alarm.log_message("Alarm %s has been marked as child" % self.id)
        self._change_root_severity()
        # Clear pending notifications
        Notification.purge_delayed("alarm:%s" % self.id)

    @classmethod
    def enable_caching(cls, ttl=600):
        cls._fields["alarm_class"].set_cache(ttl)
示例#11
0
class ActiveEvent(document.Document):
    """
    Event in the Active state
    """
    meta = {
        "collection":
        "noc.events.active",
        "allow_inheritance":
        False,
        "indexes": [
            "timestamp", "discriminator", "alarms",
            ("timestamp", "event_class", "managed_object")
        ]
    }
    status = "A"
    # Fields
    timestamp = fields.DateTimeField(required=True)
    managed_object = nosql.ForeignKeyField(ManagedObject, required=True)
    event_class = nosql.PlainReferenceField(EventClass, required=True)
    start_timestamp = fields.DateTimeField(required=True)
    repeats = fields.IntField(required=True)
    raw_vars = nosql.RawDictField()
    resolved_vars = nosql.RawDictField()
    vars = fields.DictField()
    log = fields.ListField(fields.EmbeddedDocumentField(EventLog))
    discriminator = fields.StringField(required=False)
    alarms = fields.ListField(nosql.ObjectIdField())

    def __unicode__(self):
        return u"%s" % self.id

    def mark_as_new(self, message=None):
        """
        Move to new queue
        """
        if message is None:
            message = "Reclassification requested"
        log = self.log + [
            EventLog(timestamp=datetime.datetime.now(),
                     from_status="A",
                     to_status="N",
                     message=message)
        ]
        e = NewEvent(id=self.id,
                     timestamp=self.timestamp,
                     managed_object=self.managed_object,
                     raw_vars=self.raw_vars,
                     log=log)
        e.save()
        self.delete()
        return e

    def mark_as_failed(self, version, traceback):
        """
        Move event into noc.events.failed
        """
        message = "Failed to classify on NOC version %s" % version
        log = self.log + [
            EventLog(timestamp=datetime.datetime.now(),
                     from_status="N",
                     to_status="F",
                     message=message)
        ]
        e = FailedEvent(id=self.id,
                        timestamp=self.timestamp,
                        managed_object=self.managed_object,
                        raw_vars=self.raw_vars,
                        version=version,
                        traceback=traceback,
                        log=log)
        e.save()
        self.delete()
        return e

    def mark_as_archived(self, message):
        log = self.log + [
            EventLog(timestamp=datetime.datetime.now(),
                     from_status="A",
                     to_status="S",
                     message=message)
        ]
        e = ArchivedEvent(id=self.id,
                          timestamp=self.timestamp,
                          managed_object=self.managed_object,
                          event_class=self.event_class,
                          start_timestamp=self.start_timestamp,
                          repeats=self.repeats,
                          raw_vars=self.raw_vars,
                          resolved_vars=self.resolved_vars,
                          vars=self.vars,
                          log=log,
                          alarms=self.alarms)
        e.save()
        self.delete()
        return e

    def drop(self):
        """
        Mark event to be dropped. Only for use from event trigger pyrule.
        All further operations on event may lead to unpredictable results.
        Event actually deleted by noc-classifier
        """
        self.id = None

    @property
    def to_drop(self):
        """
        Check event marked to be dropped
        """
        return self.id is None

    def log_message(self, message):
        self.log += [
            EventLog(timestamp=datetime.datetime.now(),
                     from_status=self.status,
                     to_status=self.status,
                     message=message)
        ]
        self.save()

    def log_suppression(self, timestamp):
        """
        Increate repeat count and update timestamp, if required
        """
        self.repeats += 1
        if timestamp > self.timestamp:
            self.timestamp = timestamp
        self.save()

    @property
    def duration(self):
        """
        Logged event duration in seconds
        """
        return total_seconds(self.timestamp - self.start_timestamp)

    def get_template_vars(self):
        """
        Prepare template variables
        """
        vars = self.vars.copy()
        vars.update({"event": self})
        return vars

    @property
    def subject(self):
        ctx = Context(self.get_template_vars())
        s = Template(self.event_class.subject_template).render(ctx)
        if len(s) >= 255:
            s = s[:125] + " ... " + s[-125:]
        return s

    @property
    def body(self):
        ctx = Context(self.get_template_vars())
        s = Template(self.event_class.body_template).render(ctx)
        return s

    @property
    def managed_object_id(self):
        """
        Hack to return managed_object.id without SQL lookup
        """
        o = self._data["managed_object"]
        if type(o) in (int, long):
            return o
        return o.id
示例#12
0
class AlarmClass(nosql.Document):
    """
    Alarm class
    """
    meta = {
        "collection": "noc.alarmclasses",
        "allow_inheritance": False,
        "json_collection": "fm.alarmclasses",
        "json_depends_on": ["fm.alarmseverities"]
    }

    name = fields.StringField(required=True, unique=True)
    uuid = fields.UUIDField(binary=True)
    description = fields.StringField(required=False)
    # Create or not create separate Alarm
    # if is_unique is True and there is active alarm
    # Do not create separate alarm if is_unique set
    is_unique = fields.BooleanField(default=False)
    # List of var names to be used as discriminator key
    discriminator = fields.ListField(nosql.StringField())
    # Can alarm status be cleared by user
    user_clearable = fields.BooleanField(default=True)
    # Default alarm severity
    default_severity = nosql.PlainReferenceField(AlarmSeverity)
    #
    datasources = fields.ListField(fields.EmbeddedDocumentField(DataSource))
    vars = fields.ListField(fields.EmbeddedDocumentField(AlarmClassVar))
    # Text messages
    subject_template = fields.StringField()
    body_template = fields.StringField()
    symptoms = fields.StringField()
    probable_causes = fields.StringField()
    recommended_actions = fields.StringField()

    # Flap detection
    flap_condition = fields.StringField(required=False,
                                        choices=[("none", "none"),
                                                 ("count", "count")],
                                        default=None)
    flap_window = fields.IntField(required=False, default=0)
    flap_threshold = fields.FloatField(required=False, default=0)
    # RCA
    root_cause = fields.ListField(
        fields.EmbeddedDocumentField(AlarmRootCauseCondition))
    # Job descriptions
    jobs = fields.ListField(fields.EmbeddedDocumentField(AlarmClassJob))
    #
    handlers = fields.ListField(fields.StringField())
    # Plugin settings
    plugins = fields.ListField(fields.EmbeddedDocumentField(AlarmPlugin))
    # Time in seconds to delay alarm risen notification
    notification_delay = fields.IntField(required=False)
    # Control time to reopen alarm instead of creating new
    control_time0 = fields.IntField(required=False)
    # Control time to reopen alarm after 1 reopen
    control_time1 = fields.IntField(required=False)
    # Control time to reopen alarm after >1 reopen
    control_timeN = fields.IntField(required=False)
    #
    category = nosql.ObjectIdField()

    def __unicode__(self):
        return self.name

    def save(self, *args, **kwargs):
        c_name = " | ".join(self.name.split(" | ")[:-1])
        c = AlarmClassCategory.objects.filter(name=c_name).first()
        if not c:
            c = AlarmClassCategory(name=c_name)
            c.save()
        self.category = c.id
        super(AlarmClass, self).save(*args, **kwargs)

    def get_discriminator(self, vars):
        """
        Calculate discriminator hash

        :param vars: Dict of vars
        :returns: Discriminator hash
        """
        if vars:
            ds = sorted(str(vars[n]) for n in self.discriminator)
            return hashlib.sha1("\x00".join(ds)).hexdigest()
        else:
            return hashlib.sha1("").hexdigest()

    def to_json(self):
        c = self
        r = ["{"]
        r += ["    \"name\": \"%s\"," % q(c.name)]
        r += ["    \"$collection\": \"%s\"," % self._meta["json_collection"]]
        r += ["    \"uuid\": \"%s\"," % c.uuid]
        if c.description:
            r += ["    \"desciption\": \"%s\"," % q(c.description)]
        r += ["    \"is_unique\": %s," % q(c.is_unique)]
        if c.is_unique and c.discriminator:
            r += [
                "    \"discriminator\": [%s]," %
                ", ".join(["\"%s\"" % q(d) for d in c.discriminator])
            ]
        r += ["    \"user_clearable\": %s," % q(c.user_clearable)]
        r += [
            "    \"default_severity__name\": \"%s\"," %
            q(c.default_severity.name)
        ]
        # datasources
        if c.datasources:
            r += ["    \"datasources\": ["]
            jds = []
            for ds in c.datasources:
                x = []
                x += ["            \"name\": \"%s\"" % q(ds.name)]
                x += ["            \"datasource\": \"%s\"" % q(ds.datasource)]
                ss = []
                for k in sorted(ds.search):
                    ss += [
                        "                \"%s\": \"%s\"" %
                        (q(k), q(ds.search[k]))
                    ]
                x += [
                    "            \"search\": {\n%s\n            }" %
                    (",\n".join(ss))
                ]
                jds += ["        {\n%s\n        }" % ",\n".join(x)]
            r += [",\n\n".join(jds)]
            r += ["    ],"]
        # vars
        vars = []
        for v in c.vars:
            vd = ["        {"]
            vd += ["            \"name\": \"%s\"," % q(v.name)]
            vd += ["            \"description\": \"%s\"" % q(v.description)]
            if v.default:
                vd[-1] += ","
                vd += ["            \"default\": \"%s\"" % q(v.default)]
            vd += ["        }"]
            vars += ["\n".join(vd)]
        r += ["    \"vars\": ["]
        r += [",\n".join(vars)]
        r += ["    ],"]
        # Handlers
        if self.handlers:
            hh = ["        \"%s\"" % h for h in self.handlers]
            r += ["    \"handlers\": ["]
            r += [",\n\n".join(hh)]
            r += ["    ],"]
        # Text
        r += ["    \"subject_template\": \"%s\"," % q(c.subject_template)]
        r += ["    \"body_template\": \"%s\"," % q(c.body_template)]
        r += ["    \"symptoms\": \"%s\"," % q(c.symptoms)]
        r += ["    \"probable_causes\": \"%s\"," % q(c.probable_causes)]
        r += [
            "    \"recommended_actions\": \"%s\"," % q(c.recommended_actions)
        ]
        # Root cause
        if self.root_cause:
            rc = []
            for rr in self.root_cause:
                rcd = ["        {"]
                rcd += ["            \"name\": \"%s\"," % rr.name]
                rcd += ["            \"root__name\": \"%s\"," % rr.root.name]
                rcd += ["            \"window\": %d," % rr.window]
                if rr.condition:
                    rcd += [
                        "            \"condition\": \"%s\"," % rr.condition
                    ]
                rcd += ["            \"match_condition\": {"]
                mcv = []
                for v in rr.match_condition:
                    mcv += [
                        "                \"%s\": \"%s\"" %
                        (v, rr.match_condition[v])
                    ]
                rcd += [",\n".join(mcv)]
                rcd += ["            }"]
                rcd += ["        }"]
                rc += ["\n".join(rcd)]
            if r[-1][-1] != ",":
                r[-1] += ","
            r += ["    \"root_cause\": ["]
            r += [",\n".join(rc)]
            r += ["    ]"]
        # Jobs
        if self.jobs:
            jobs = []
            for job in self.jobs:
                jd = ["        {"]
                jd += ["            \"job\": \"%s\"," % job.job]
                jd += ["            \"interval\": %d," % job.interval]
                jd += ["            \"vars\": {"]
                jv = []
                for v in job.vars:
                    jv += ["                \"%s\": \"%s\"" % (v, job.vars[v])]
                jd += [",\n".join(jv)]
                jd += ["            }"]
                jd += ["        }"]
                jobs += ["\n".join(jd)]
            if r[-1][-1] != ",":
                r[-1] += ","
            r += ["    \"jobs\": ["]
            r += [",\n".join(jobs)]
            r += ["    ]"]
        # Plugins
        if self.plugins:
            if r[-1][-1] != ",":
                r[-1] += ","
            plugins = []
            for p in self.plugins:
                pd = ["        {"]
                pd += ["            \"name\": \"%s\"" % p.name]
                if p.config:
                    pd[-1] += ","
                    pc = []
                    for v in p.config:
                        pc += [
                            "                \"%s\": \"%s\"" %
                            (v, p.config.vars[v])
                        ]
                    pd += ["            \"config\": {"]
                    pd += [",\n".join(pc)]
                    pd += ["            }"]
                pd += ["        }"]
                plugins += ["\n".join(pd)]
            r += ["    \"plugins\": ["]
            r += [",\n".join(plugins)]
            r += ["    ]"]
        if self.notification_delay:
            if r[-1][-1] != ",":
                r[-1] += ","
            r += ["    \"notification_delay\": %d" % self.notification_delay]
        if self.control_time0:
            if r[-1][-1] != ",":
                r[-1] += ","
            r += ["    \"control_time0\": %d" % self.control_time0]
            if self.control_time1:
                r[-1] += ","
                r += ["    \"control_time1\": %d" % self.control_time1]
                if self.control_timeN:
                    r[-1] += ","
                    r += ["    \"control_timeN\": %d" % self.control_timeN]
        # Close
        if r[-1].endswith(","):
            r[-1] = r[-1][:-1]
        r += ["}", ""]
        return "\n".join(r)

    def get_json_path(self):
        p = [quote_safe_path(n.strip()) for n in self.name.split("|")]
        return os.path.join(*p) + ".json"

    @property
    def config(self):
        if not hasattr(self, "_config"):
            self._config = AlarmClassConfig.objects.filter(
                alarm_class=self.id).first()
        return self._config

    def get_notification_delay(self):
        if self.config:
            return self.config.notification_delay or None
        else:
            return self.notification_delay or None

    def get_control_time(self, reopens):
        if reopens == 0:
            if self.config:
                return self.config.control_time0 or None
            else:
                return self.control_time0 or None
        elif reopens == 1:
            if self.config:
                return self.config.control_time1 or None
            else:
                return self.control_time1 or None
        else:
            if self.config:
                return self.config.control_timeN or None
            else:
                return self.control_timeN or None
示例#13
0
class ArchivedAlarm(Document):
    meta = {
        "collection": "noc.alarms.archived",
        "strict": False,
        "auto_create_index": False,
        "indexes": [
            "root",
            "timestamp",
            "managed_object",
            ("managed_object", "discriminator", "control_time"),
            "escalation_tt",
            "escalation_ts",
        ],
    }
    status = "C"

    timestamp = DateTimeField(required=True)
    clear_timestamp = DateTimeField(required=True)
    managed_object = nosql.ForeignKeyField(ManagedObject)
    alarm_class = nosql.PlainReferenceField(AlarmClass)
    severity = IntField(required=True)
    vars = DictField()
    log = ListField(EmbeddedDocumentField(AlarmLog))
    #
    opening_event = ObjectIdField(required=False)
    closing_event = ObjectIdField(required=False)
    # Number of reopens
    reopens = IntField(required=False)
    # Copied discriminator
    discriminator = StringField(required=False)
    # Manual acknowledgement timestamp
    ack_ts = DateTimeField(required=False)
    # Manual acknowledgement user name
    ack_user = StringField(required=False)
    # Control time within alarm will be reopen instead
    # instead of creating the new alarm
    control_time = DateTimeField(required=False)
    # RCA
    # Reference to root cause (Active Alarm or Archived Alarm instance)
    root = ObjectIdField(required=False)
    # Escalated TT ID in form
    # <external system name>:<external tt id>
    escalation_ts = DateTimeField(required=False)
    escalation_tt = StringField(required=False)
    escalation_error = StringField(required=False)
    escalation_ctx = LongField(required=False)
    escalation_close_ts = DateTimeField(required=False)
    escalation_close_error = StringField(required=False)
    escalation_close_ctx = LongField(required=False)
    # Directly affected services summary, grouped by profiles
    # (connected to the same managed object)
    direct_services = ListField(EmbeddedDocumentField(SummaryItem))
    direct_subscribers = ListField(EmbeddedDocumentField(SummaryItem))
    # Indirectly affected services summary, groupped by profiles
    # (covered by this and all inferred alarms)
    total_objects = ListField(EmbeddedDocumentField(ObjectSummaryItem))
    total_services = ListField(EmbeddedDocumentField(SummaryItem))
    total_subscribers = ListField(EmbeddedDocumentField(SummaryItem))
    # Paths
    adm_path = ListField(IntField())
    segment_path = ListField(ObjectIdField())
    container_path = ListField(ObjectIdField())
    # Uplinks, for topology_rca only
    uplinks = ListField(IntField())
    # RCA neighbor cache, for topology_rca only
    rca_neighbors = ListField(IntField())

    def __str__(self):
        return "%s" % self.id

    def iter_changed_datastream(self, changed_fields=None):
        if config.datastream.enable_alarm:
            yield "alarm", self.id

    def log_message(self, message):
        self.log += [
            AlarmLog(
                timestamp=datetime.datetime.now(),
                from_status=self.status,
                to_status=self.status,
                message=message,
            )
        ]
        self.save()

    def get_template_vars(self):
        """
        Prepare template variables
        """
        vars = self.vars.copy()
        vars.update({"alarm": self})
        return vars

    @property
    def subject(self):
        ctx = Context(self.get_template_vars())
        s = Template(self.alarm_class.subject_template).render(ctx)
        if len(s) >= 255:
            s = s[:125] + " ... " + s[-125:]
        return s

    @property
    def body(self):
        ctx = Context(self.get_template_vars())
        s = Template(self.alarm_class.body_template).render(ctx)
        return s

    @property
    def duration(self):
        dt = self.clear_timestamp - self.timestamp
        return dt.days * 86400 + dt.seconds

    @property
    def display_duration(self):
        duration = self.clear_timestamp - self.timestamp
        secs = duration.seconds % 60
        mins = (duration.seconds / 60) % 60
        hours = (duration.seconds / 3600) % 24
        days = duration.days
        if days:
            return "%dd %02d:%02d:%02d" % (days, hours, mins, secs)
        else:
            return "%02d:%02d:%02d" % (hours, mins, secs)

    @property
    def effective_style(self):
        return AlarmSeverity.get_severity(self.severity).style

    def set_root(self, root_alarm):
        pass

    def reopen(self, message):
        """
        Reopen alarm back
        """
        reopens = self.reopens or 0
        ts = datetime.datetime.now()
        log = self.log + [AlarmLog(timestamp=ts, from_status="C", to_status="A", message=message)]
        a = ActiveAlarm(
            id=self.id,
            timestamp=self.timestamp,
            last_update=ts,
            managed_object=self.managed_object,
            alarm_class=self.alarm_class,
            severity=self.severity,
            vars=self.vars,
            log=log,
            root=self.root,
            escalation_ts=self.escalation_ts,
            escalation_tt=self.escalation_tt,
            escalation_error=self.escalation_error,
            escalation_ctx=self.escalation_ctx,
            opening_event=self.opening_event,
            discriminator=self.discriminator,
            reopens=reopens + 1,
            direct_services=self.direct_services,
            direct_subscribers=self.direct_subscribers,
            total_objects=self.total_objects,
            total_services=self.total_services,
            total_subscribers=self.total_subscribers,
            adm_path=self.adm_path,
            segment_path=self.segment_path,
            container_path=self.container_path,
            uplinks=self.uplinks,
        )
        a.save()
        # @todo: Clear related correlator jobs
        self.delete()
        # Send notifications
        # Do not set notifications for child and for previously reopened
        # alarms
        if not a.root and not reopens:
            a.managed_object.event(
                a.managed_object.EV_ALARM_REOPENED,
                {
                    "alarm": a,
                    "subject": a.subject,
                    "body": a.body,
                    "symptoms": a.alarm_class.symptoms,
                    "recommended_actions": a.alarm_class.recommended_actions,
                    "probable_causes": a.alarm_class.probable_causes,
                },
            )
        return a

    def iter_consequences(self):
        """
        Generator yielding all consequences alarm
        """
        for a in ArchivedAlarm.objects.filter(root=self.id):
            yield a
            for ca in a.iter_consequences():
                yield ca

    def iter_affected(self):
        """
        Generator yielding all affected managed objects
        """
        seen = {self.managed_object}
        yield self.managed_object
        for a in self.iter_consequences():
            if a.managed_object not in seen:
                seen.add(a.managed_object)
                yield a.managed_object

    def set_escalation_close_error(self, error):
        self.escalation_error = error
        self._get_collection().update({"_id": self.id}, {"$set": {"escalation_close_error": error}})

    def close_escalation(self):
        now = datetime.datetime.now()
        self.escalation_close_ts = now
        self._get_collection().update({"_id": self.id}, {"$set": {"escalation_close_ts": now}})

    def set_escalation_close_ctx(self):
        current_context, current_span = get_current_span()
        if current_context or self.escalation_close_ctx:
            self.escalation_close_ctx = current_context
            self._get_collection().update(
                {"_id": self.id}, {"$set": {"escalation_close_ctx": current_context}}
            )
示例#14
0
class AlarmClass(nosql.Document):
    """
    Alarm class
    """
    meta = {
        "collection": "noc.alarmclasses",
        "strict": False,
        "auto_create_index": False,
        "json_collection": "fm.alarmclasses",
        "json_depends_on": [
            "fm.alarmseverities"
        ],
    }

    name = fields.StringField(required=True, unique=True)
    uuid = fields.UUIDField(binary=True)
    description = fields.StringField(required=False)
    # Create or not create separate Alarm
    # if is_unique is True and there is active alarm
    # Do not create separate alarm if is_unique set
    is_unique = fields.BooleanField(default=False)
    # List of var names to be used as discriminator key
    discriminator = fields.ListField(nosql.StringField())
    # Can alarm status be cleared by user
    user_clearable = fields.BooleanField(default=True)
    # Default alarm severity
    default_severity = nosql.PlainReferenceField(AlarmSeverity)
    #
    datasources = fields.ListField(fields.EmbeddedDocumentField(DataSource))
    vars = fields.ListField(fields.EmbeddedDocumentField(AlarmClassVar))
    # Text messages
    subject_template = fields.StringField()
    body_template = fields.StringField()
    symptoms = fields.StringField()
    probable_causes = fields.StringField()
    recommended_actions = fields.StringField()

    # Flap detection
    flap_condition = fields.StringField(
        required=False,
        choices=[("none", "none"), ("count", "count")],
        default="none")
    flap_window = fields.IntField(required=False, default=0)
    flap_threshold = fields.FloatField(required=False, default=0)
    # RCA
    root_cause = fields.ListField(
        fields.EmbeddedDocumentField(AlarmRootCauseCondition))
    topology_rca = fields.BooleanField(default=False)
    # List of handlers to be called on alarm raising
    handlers = fields.ListField(fields.StringField())
    # List of handlers to be called on alarm clear
    clear_handlers = fields.ListField(fields.StringField())
    # Plugin settings
    plugins = fields.ListField(fields.EmbeddedDocumentField(AlarmPlugin))
    # Time in seconds to delay alarm risen notification
    notification_delay = fields.IntField(required=False)
    # Control time to reopen alarm instead of creating new
    control_time0 = fields.IntField(required=False)
    # Control time to reopen alarm after 1 reopen
    control_time1 = fields.IntField(required=False)
    # Control time to reopen alarm after >1 reopen
    control_timeN = fields.IntField(required=False)
    # Consequence recover time
    # Root cause will be detached if consequence alarm
    # will not clear itself in *recover_time*
    recover_time = fields.IntField(required=False, default=300)
    #
    bi_id = fields.LongField(unique=True)
    #
    category = nosql.ObjectIdField()

    _id_cache = cachetools.TTLCache(maxsize=1000, ttl=60)
    _bi_id_cache = cachetools.TTLCache(maxsize=1000, ttl=60)
    _name_cache = cachetools.TTLCache(maxsize=1000, ttl=60)

    _handlers_cache = {}
    _clear_handlers_cache = {}

    def __unicode__(self):
        return self.name

    @classmethod
    @cachetools.cachedmethod(operator.attrgetter("_id_cache"), lock=lambda _: id_lock)
    def get_by_id(cls, id):
        return AlarmClass.objects.filter(id=id).first()

    @classmethod
    @cachetools.cachedmethod(operator.attrgetter("_bi_id_cache"), lock=lambda _: id_lock)
    def get_by_bi_id(cls, id):
        return AlarmClass.objects.filter(bi_id=id).first()

    @classmethod
    @cachetools.cachedmethod(operator.attrgetter("_name_cache"), lock=lambda _: id_lock)
    def get_by_name(cls, name):
        return AlarmClass.objects.filter(name=name).first()

    def get_handlers(self):
        @cachetools.cached(self._handlers_cache, key=lambda x: x.id, lock=handlers_lock)
        def _get_handlers(alarm_class):
            handlers = []
            for hh in alarm_class.handlers:
                try:
                    h = get_handler(hh)
                except ImportError:
                    h = None
                if h:
                    handlers += [h]
            return handlers

        return _get_handlers(self)

    def get_clear_handlers(self):
        @cachetools.cached(self._clear_handlers_cache, key=lambda x: x.id, lock=handlers_lock)
        def _get_handlers(alarm_class):
            handlers = []
            for hh in alarm_class.clear_handlers:
                try:
                    h = get_handler(hh)
                except ImportError:
                    h = None
                if h:
                    handlers += [h]
            return handlers

        return _get_handlers(self)

    def save(self, *args, **kwargs):
        c_name = " | ".join(self.name.split(" | ")[:-1])
        c = AlarmClassCategory.objects.filter(name=c_name).first()
        if not c:
            c = AlarmClassCategory(name=c_name)
            c.save()
        self.category = c.id
        super(AlarmClass, self).save(*args, **kwargs)

    def get_discriminator(self, vars):
        """
        Calculate discriminator hash

        :param vars: Dict of vars
        :returns: Discriminator hash
        """
        if vars:
            ds = sorted(str(vars[n]) for n in self.discriminator)
            return hashlib.sha1("\x00".join(ds)).hexdigest()
        else:
            return hashlib.sha1("").hexdigest()

    def to_json(self):
        c = self
        r = ["{"]
        r += ["    \"name\": \"%s\"," % q(c.name)]
        r += ["    \"$collection\": \"%s\"," % self._meta["json_collection"]]
        r += ["    \"uuid\": \"%s\"," % c.uuid]
        if c.description:
            r += ["    \"desciption\": \"%s\"," % q(c.description)]
        r += ["    \"is_unique\": %s," % q(c.is_unique)]
        if c.is_unique and c.discriminator:
            r += ["    \"discriminator\": [%s]," % ", ".join(["\"%s\"" % q(d) for d in c.discriminator])]
        r += ["    \"user_clearable\": %s," % q(c.user_clearable)]
        r += ["    \"default_severity__name\": \"%s\"," % q(c.default_severity.name)]
        # datasources
        if c.datasources:
            r += ["    \"datasources\": ["]
            jds = []
            for ds in c.datasources:
                x = []
                x += ["            \"name\": \"%s\"" % q(ds.name)]
                x += ["            \"datasource\": \"%s\"" % q(ds.datasource)]
                ss = []
                for k in sorted(ds.search):
                    ss += ["                \"%s\": \"%s\"" % (q(k), q(ds.search[k]))]
                x += ["            \"search\": {\n%s\n            }" % (",\n".join(ss))]
                jds += ["        {\n%s\n        }" % ",\n".join(x)]
            r += [",\n\n".join(jds)]
            r += ["    ],"]
        # vars
        vars = []
        for v in c.vars:
            vd = ["        {"]
            vd += ["            \"name\": \"%s\"," % q(v.name)]
            vd += ["            \"description\": \"%s\"" % q(v.description)]
            if v.default:
                vd[-1] += ","
                vd += ["            \"default\": \"%s\"" % q(v.default)]
            vd += ["        }"]
            vars += ["\n".join(vd)]
        r += ["    \"vars\": ["]
        r += [",\n".join(vars)]
        r += ["    ],"]
        # Handlers
        if self.handlers:
            hh = ["        \"%s\"" % h for h in self.handlers]
            r += ["    \"handlers\": ["]
            r += [",\n\n".join(hh)]
            r += ["    ],"]
        if self.clear_handlers:
            hh = ["        \"%s\"" % h for h in self.clear_handlers]
            r += ["    \"clear_handlers\": ["]
            r += [",\n\n".join(hh)]
            r += ["    ],"]
        # Text
        r += ["    \"subject_template\": \"%s\"," % q(c.subject_template)]
        r += ["    \"body_template\": \"%s\"," % q(c.body_template)]
        r += ["    \"symptoms\": \"%s\"," % q(c.symptoms if c.symptoms else "")]
        r += ["    \"probable_causes\": \"%s\"," % q(c.probable_causes if c.probable_causes else "")]
        r += ["    \"recommended_actions\": \"%s\"," % q(c.recommended_actions if c.recommended_actions else "")]
        # Root cause
        if self.root_cause:
            rc = []
            for rr in self.root_cause:
                rcd = ["        {"]
                rcd += ["            \"name\": \"%s\"," % rr.name]
                rcd += ["            \"root__name\": \"%s\"," % rr.root.name]
                rcd += ["            \"window\": %d," % rr.window]
                if rr.condition:
                    rcd += ["            \"condition\": \"%s\"," % rr.condition]
                rcd += ["            \"match_condition\": {"]
                mcv = []
                for v in rr.match_condition:
                    mcv += ["                \"%s\": \"%s\"" % (v, rr.match_condition[v])]
                rcd += [",\n".join(mcv)]
                rcd += ["            }"]
                rcd += ["        }"]
                rc += ["\n".join(rcd)]
            if r[-1][-1] != ",":
                r[-1] += ","
            r += ["    \"root_cause\": ["]
            r += [",\n".join(rc)]
            r += ["    ]"]
        if self.topology_rca:
            if r[-1][-1] != ",":
                r[-1] += ","
            r += ["    \"topology_rca\": true"]
        # Plugins
        if self.plugins:
            if r[-1][-1] != ",":
                r[-1] += ","
            plugins = []
            for p in self.plugins:
                pd = ["        {"]
                pd += ["            \"name\": \"%s\"" % p.name]
                if p.config:
                    pd[-1] += ","
                    pc = []
                    for v in p.config:
                        pc += ["                \"%s\": \"%s\"" % (v, p.config.vars[v])]
                    pd += ["            \"config\": {"]
                    pd += [",\n".join(pc)]
                    pd += ["            }"]
                pd += ["        }"]
                plugins += ["\n".join(pd)]
            r += ["    \"plugins\": ["]
            r += [",\n".join(plugins)]
            r += ["    ]"]
        if self.notification_delay:
            if r[-1][-1] != ",":
                r[-1] += ","
            r += ["    \"notification_delay\": %d" % self.notification_delay]
        if self.control_time0:
            if r[-1][-1] != ",":
                r[-1] += ","
            r += ["    \"control_time0\": %d" % self.control_time0]
            if self.control_time1:
                r[-1] += ","
                r += ["    \"control_time1\": %d" % self.control_time1]
                if self.control_timeN:
                    r[-1] += ","
                    r += ["    \"control_timeN\": %d" % self.control_timeN]
        if self.recover_time:
            if r[-1][-1] != ",":
                r[-1] += ","
            r += ["    \"recover_time\": %d" % self.recover_time]
        # Close
        if r[-1].endswith(","):
            r[-1] = r[-1][:-1]
        r += ["}", ""]
        return "\n".join(r)

    def get_json_path(self):
        p = [quote_safe_path(n.strip()) for n in self.name.split("|")]
        return os.path.join(*p) + ".json"

    @property
    def config(self):
        if not hasattr(self, "_config"):
            self._config = AlarmClassConfig.objects.filter(alarm_class=self.id).first()
        return self._config

    def get_notification_delay(self):
        if self.config:
            return self.config.notification_delay or None
        else:
            return self.notification_delay or None

    def get_control_time(self, reopens):
        if reopens == 0:
            if self.config:
                return self.config.control_time0 or None
            else:
                return self.control_time0 or None
        elif reopens == 1:
            if self.config:
                return self.config.control_time1 or None
            else:
                return self.control_time1 or None
        else:
            if self.config:
                return self.config.control_timeN or None
            else:
                return self.control_timeN or None
示例#15
0
class ArchivedAlarm(nosql.Document):
    meta = {
        "collection": "noc.alarms.archived",
        "allow_inheritance": False,
        "indexes": [
            "root",
            "control_time",
            "timestamp",
            "managed_object"
        ]
    }
    status = "C"

    timestamp = nosql.DateTimeField(required=True)
    clear_timestamp = nosql.DateTimeField(required=True)
    managed_object = nosql.ForeignKeyField(ManagedObject)
    alarm_class = nosql.PlainReferenceField(AlarmClass)
    severity = nosql.IntField(required=True)
    vars = nosql.DictField()
    log = nosql.ListField(nosql.EmbeddedDocumentField(AlarmLog))
    #
    opening_event = nosql.ObjectIdField(required=False)
    closing_event = nosql.ObjectIdField(required=False)
    # Number of reopens
    reopens = nosql.IntField(required=False)
    # Copied discriminator
    discriminator = nosql.StringField(required=False)
    # Control time within alarm will be reopen instead
    # instead of creating the new alarm
    control_time = nosql.DateTimeField(required=False)
    # RCA
    # Reference to root cause (Active Alarm or Archived Alarm instance)
    root = nosql.ObjectIdField(required=False)

    def __unicode__(self):
        return u"%s" % self.id

    def log_message(self, message):
        self.log += [AlarmLog(timestamp=datetime.datetime.now(),
                     from_status=self.status, to_status=self.status,
                     message=message)]
        self.save()

    def get_template_vars(self):
        """
        Prepare template variables
        """
        vars = self.vars.copy()
        vars.update({"event": self})
        return vars

    @property
    def subject(self):
        ctx = Context(self.get_template_vars())
        s = Template(self.alarm_class.subject_template).render(ctx)
        if len(s) >= 255:
            s = s[:125] + " ... " + s[-125:]
        return s

    @property
    def body(self):
        ctx = Context(self.get_template_vars())
        s = Template(self.alarm_class.body_template).render(ctx)
        return s

    @property
    def duration(self):
        dt = self.clear_timestamp - self.timestamp
        return dt.days * 86400 + dt.seconds

    @property
    def display_duration(self):
        duration = self.clear_timestamp - self.timestamp
        secs = duration.seconds % 60
        mins = (duration.seconds / 60) % 60
        hours = (duration.seconds / 3600) % 24
        days = duration.days
        if days:
            return "%dd %02d:%02d:%02d" % (days, hours, mins, secs)
        else:
            return "%02d:%02d:%02d" % (hours, mins, secs)

    @property
    def effective_style(self):
        return AlarmSeverity.get_severity(self.severity).style

    def set_root(self, root_alarm):
        pass

    def reopen(self, message):
        """
        Reopen alarm back
        """
        reopens = self.reopens or 0
        ts = datetime.datetime.now()
        log = self.log + [AlarmLog(timestamp=ts, from_status="C",
                                   to_status="A", message=message)]
        a = ActiveAlarm(
            id=self.id,
            timestamp=self.timestamp,
            last_update=ts,
            managed_object=self.managed_object,
            alarm_class=self.alarm_class,
            severity=self.severity,
            vars=self.vars,
            log=log,
            root=self.root,
            opening_event=self.opening_event,
            discriminator=self.discriminator,
            reopens=reopens + 1
        )
        a.save()
        # @todo: Clear related correlator jobs
        self.delete()
        # Remove pending control_notify job
        remove_job("fm.correlator", "control_notify", key=a.id)
        # Send notifications
        # Do not set notifications for child and for previously reopened
        # alarms
        if not a.root and not reopens:
            a.managed_object.event(a.managed_object.EV_ALARM_REOPENED, {
                "alarm": a,
                "subject": a.subject,
                "body": a.body,
                "symptoms": a.alarm_class.symptoms,
                "recommended_actions": a.alarm_class.recommended_actions,
                "probable_causes": a.alarm_class.probable_causes
            })
        return a
示例#16
0
class Process(nosql.Document):
    meta = {
        "collection": "noc.wf.processes",
        "allow_inheritance": False
    }

    workflow = nosql.PlainReferenceField(Workflow)
    node = nosql.PlainReferenceField(Node)
    context = nosql.RawDictField()
    start_time = nosql.DateTimeField()
    trace = nosql.BooleanField(default=False)
    sleep_time = nosql.IntField()

    class SleepException(Exception):
        pass

    class CannotSleepError(Exception):
        pass

    def __unicode__(self):
        return "%s at %s (%s)" % (self.workflow, self.node, self.id)

    def info(self, msg):
        logging.info("[%s (PID: %s)] %s" % (
            self.workflow, self.id, msg))

    def update_context(self, param, value):
        self.context[param] = value

    def step(self):
        to_sleep = False
        while not to_sleep:
            if self.trace:
                self.info("Entering node '%s' (%s %s) with context %s" % (
                    self.node.name, self.node.handler,
                    self.node.params, self.context
                ))
            handler = self.node.handler_class()
            try:
                r = handler.run(self, self.node)
            except self.SleepException:
                to_sleep = True
            if self.trace:
                self.info("Leaving node '%s' with context %s" % (
                    self.node.name, self.context
                ))
            # Detect next node
            if handler.conditional:
                if r:
                    next_node = self.node.next_true_node
                else:
                    next_node = self.node.next_false_node
            else:
                next_node = self.node.next_node
            # Move to next node
            if next_node:
                if self.trace:
                    self.info("Moving to node '%s'" % next_node.name)
                self.node = next_node
                self.save()
            else:
                if self.trace:
                    self.info("Stopping at node '%s' with context %s" % (self.node.name, self.context))
                return True
        return False  # Suspended

    def schedule(self):
        submit_job("wf.jobs", "wf.wfstep", key=self.id)

    def sleep(self, t):
        if self.node.handler_class().conditional:
            raise self.CannotSleepError(
                "Cannot sleep in conditional handler")
        self.sleep_time = t
        self.save()
        raise self.SleepException
示例#17
0
class ActiveAlarm(nosql.Document):
    meta = {
        "collection": "noc.alarms.active",
        "strict": False,
        "auto_create_index": False,
        "indexes": [
            "timestamp", "root", "-severity",
            ("alarm_class", "managed_object"),
            ("discriminator", "managed_object"),
            ("timestamp", "managed_object"),
            "escalation_tt",
            "escalation_ts",
            "adm_path",
            "segment_path",
            "container_path",
            "uplinks"
        ]
    }
    status = "A"

    timestamp = nosql.DateTimeField(required=True)
    last_update = nosql.DateTimeField(required=True)
    managed_object = nosql.ForeignKeyField(ManagedObject)
    alarm_class = nosql.PlainReferenceField(AlarmClass)
    severity = nosql.IntField(required=True)
    vars = nosql.DictField()
    # Calculated alarm discriminator
    # Has meaning only for alarms with is_unique flag set
    # Calculated as sha1("value1\x00....\x00valueN").hexdigest()
    discriminator = nosql.StringField(required=False)
    log = nosql.ListField(nosql.EmbeddedDocumentField(AlarmLog))
    # Responsible person
    owner = nosql.ForeignKeyField(User, required=False)
    #
    opening_event = nosql.ObjectIdField(required=False)
    closing_event = nosql.ObjectIdField(required=False)
    # List of subscribers
    subscribers = nosql.ListField(nosql.ForeignKeyField(User))
    #
    custom_subject = nosql.StringField(required=False)
    custom_style = nosql.ForeignKeyField(Style, required=False)
    #
    reopens = nosql.IntField(required=False)
    # RCA
    # Reference to root cause (Active Alarm or Archived Alarm instance)
    root = nosql.ObjectIdField(required=False)
    # Escalated TT ID in form
    # <external system name>:<external tt id>
    escalation_ts = nosql.DateTimeField(required=False)
    escalation_tt = nosql.StringField(required=False)
    escalation_error = nosql.StringField(required=False)
    # span context
    escalation_ctx = nosql.LongField(required=False)
    # Close tt when alarm cleared
    close_tt = nosql.BooleanField(default=False)
    # Do not clear alarm until *wait_tt* is closed
    wait_tt = nosql.StringField()
    wait_ts = nosql.DateTimeField()
    # Directly affected services summary, grouped by profiles
    # (connected to the same managed object)
    direct_services = nosql.ListField(nosql.EmbeddedDocumentField(SummaryItem))
    direct_subscribers = nosql.ListField(nosql.EmbeddedDocumentField(SummaryItem))
    # Indirectly affected services summary, groupped by profiles
    # (covered by this and all inferred alarms)
    total_objects = nosql.ListField(nosql.EmbeddedDocumentField(ObjectSummaryItem))
    total_services = nosql.ListField(nosql.EmbeddedDocumentField(SummaryItem))
    total_subscribers = nosql.ListField(nosql.EmbeddedDocumentField(SummaryItem))
    # Template and notification group to send close notification
    clear_template = nosql.ForeignKeyField(Template, required=False)
    clear_notification_group = nosql.ForeignKeyField(NotificationGroup, required=False)
    # Paths
    adm_path = nosql.ListField(nosql.IntField())
    segment_path = nosql.ListField(nosql.ObjectIdField())
    container_path = nosql.ListField(nosql.ObjectIdField())
    # Uplinks, for topology_rca only
    uplinks = nosql.ListField(nosql.IntField())

    def __unicode__(self):
        return u"%s" % self.id

    def iter_changed_datastream(self):
        if config.datastream.enable_alarm:
            yield "alarm", self.id

    def clean(self):
        super(ActiveAlarm, self).clean()
        if not self.last_update:
            self.last_update = self.timestamp
        data = self.managed_object.data
        self.adm_path = data.adm_path
        self.segment_path = data.segment_path
        self.container_path = data.container_path
        self.uplinks = data.uplinks

    def safe_save(self, **kwargs):
        """
        Create new alarm or update existing if still exists
        :param kwargs:
        :return:
        """
        if self.id:
            # Update existing only if exists
            if "save_condition" not in kwargs:
                kwargs["save_condition"] = {"id": self.id}
            try:
                self.save(**kwargs)
            except SaveConditionError:
                pass  # Race condition, closed during update
        else:
            self.save()

    def change_severity(self, user="", delta=None, severity=None, to_save=True):
        """
        Change alarm severity
        """
        if isinstance(user, User):
            user = user.username
        if delta:
            self.severity = max(0, self.severity + delta)
            if delta > 0:
                self.log_message(
                    "%s has increased alarm severity by %s" % (
                        user, delta))
            else:
                self.log_message(
                    "%s has decreased alarm severity by %s" % (
                        user, delta))
        elif severity:
            if type(severity) in (int, long, float):
                self.severity = int(severity)
                self.log_message(
                    "%s has changed severity to %s" % (user, severity))
            else:
                self.severity = severity.severity
                self.log_message(
                    "%s has changed severity to %s" % (user, severity.name))
        if to_save:
            self.safe_save()

    def log_message(self, message, to_save=True):
        self.log += [AlarmLog(timestamp=datetime.datetime.now(),
                     from_status=self.status, to_status=self.status,
                     message=message)]
        if to_save:
            self.safe_save()

    def clear_alarm(self, message, ts=None, force=False):
        """
        Clear alarm
        :param message: Log clearing message
        :param ts: Clearing timestamp
        :param force: Clear ever if wait_tt seg
        """
        ts = ts or datetime.datetime.now()
        if self.wait_tt and not force:
            # Wait for escalated tt to close
            if not self.wait_ts:
                self.wait_ts = ts
                self.log_message("Waiting for TT to close")
                call_later(
                    "noc.services.escalator.wait_tt.wait_tt",
                    scheduler="escalator",
                    pool=self.managed_object.escalator_shard,
                    alarm_id=self.id
                )
            return
        if self.alarm_class.clear_handlers:
            # Process clear handlers
            for h in self.alarm_class.get_clear_handlers():
                try:
                    h(self)
                except Exception:
                    error_report()
        log = self.log + [AlarmLog(timestamp=ts, from_status="A",
                                   to_status="C", message=message)]
        a = ArchivedAlarm(
            id=self.id,
            timestamp=self.timestamp,
            clear_timestamp=ts,
            managed_object=self.managed_object,
            alarm_class=self.alarm_class,
            severity=self.severity,
            vars=self.vars,
            log=log,
            root=self.root,
            escalation_ts=self.escalation_ts,
            escalation_tt=self.escalation_tt,
            escalation_error=self.escalation_error,
            escalation_ctx=self.escalation_ctx,
            opening_event=self.opening_event,
            closing_event=self.closing_event,
            discriminator=self.discriminator,
            reopens=self.reopens,
            direct_services=self.direct_services,
            direct_subscribers=self.direct_subscribers,
            total_objects=self.total_objects,
            total_services=self.total_services,
            total_subscribers=self.total_subscribers,
            adm_path=self.adm_path,
            segment_path=self.segment_path,
            container_path=self.container_path,
            uplinks=self.uplinks
        )
        ct = self.alarm_class.get_control_time(self.reopens)
        if ct:
            a.control_time = datetime.datetime.now() + datetime.timedelta(seconds=ct)
        a.save()
        # Send notifications
        if not a.root and not self.reopens:
            a.managed_object.event(a.managed_object.EV_ALARM_CLEARED, {
                "alarm": a,
                "subject": a.subject,
                "body": a.body,
                "symptoms": a.alarm_class.symptoms,
                "recommended_actions": a.alarm_class.recommended_actions,
                "probable_causes": a.alarm_class.probable_causes
            })
        elif ct:
            pass
        # Set checks on all consequences
        for d in self._get_collection().find({
            "root": self.id
        }, {"_id": 1, "alarm_class": 1}):
            ac = AlarmClass.get_by_id(d["alarm_class"])
            if not ac:
                continue
            t = ac.recover_time
            if not t:
                continue
            call_later(
                "noc.services.correlator.check.check_close_consequence",
                scheduler="correlator",
                pool=self.managed_object.pool.name,
                delay=t,
                alarm_id=d["_id"]
            )
        # Clear alarm
        self.delete()
        # Close TT
        # MUST be after .delete() to prevent race conditions
        if a.escalation_tt or self.clear_template:
            if self.clear_template:
                ctx = {
                    "alarm": a
                }
                subject = self.clear_template.render_subject(**ctx)
                body = self.clear_template.render_body(**ctx)
            else:
                subject = "Alarm cleared"
                body = "Alarm has been cleared"
            call_later(
                "noc.services.escalator.escalation.notify_close",
                scheduler="escalator",
                pool=self.managed_object.escalator_shard,
                max_runs=ALARM_CLOSE_RETRIES,
                alarm_id=self.id,
                tt_id=self.escalation_tt,
                subject=subject,
                body=body,
                notification_group_id=self.clear_notification_group.id if self.clear_notification_group else None,
                close_tt=self.close_tt
            )
        # Gather diagnostics
        AlarmDiagnosticConfig.on_clear(a)
        # Return archived
        return a

    def get_template_vars(self):
        """
        Prepare template variables
        """
        vars = self.vars.copy()
        vars.update({"alarm": self})
        return vars

    @property
    def subject(self):
        if self.custom_subject:
            s = self.custom_subject
        else:
            ctx = Context(self.get_template_vars())
            s = DjangoTemplate(self.alarm_class.subject_template).render(ctx)
        if len(s) >= 255:
            s = s[:125] + " ... " + s[-125:]
        return s

    @property
    def body(self):
        ctx = Context(self.get_template_vars())
        s = DjangoTemplate(self.alarm_class.body_template).render(ctx)
        return s

    def change_owner(self, user):
        """
        Change alarm's owner
        """
        self.owner = user
        self.save()

    def subscribe(self, user):
        """
        Change alarm's subscribers
        """
        if user.id not in self.subscribers:
            self.subscribers += [user.id]
            self.log_message("%s(%s) has been subscribed" % (
                (" ".join([user.first_name, user.last_name]),
                 user.username)
            ), to_save=False)
            self.save()

    def unsubscribe(self, user):
        if self.is_subscribed(user):
            self.subscribers = [u.id for u in self.subscribers
                                if u != user.id]
            self.log_message("%s(%s) has been unsubscribed" % (
                (" ".join([user.first_name, user.last_name]),
                 user.username)
            ), to_save=False)
            self.save()

    def is_owner(self, user):
        return self.owner == user

    def is_subscribed(self, user):
        return user.id in self.subscribers

    @property
    def is_unassigned(self):
        return self.owner is None

    @property
    def duration(self):
        dt = datetime.datetime.now() - self.timestamp
        return dt.days * 86400 + dt.seconds

    @property
    def display_duration(self):
        duration = datetime.datetime.now() - self.timestamp
        secs = duration.seconds % 60
        mins = (duration.seconds / 60) % 60
        hours = (duration.seconds / 3600) % 24
        days = duration.days
        r = "%02d:%02d:%02d" % (hours, mins, secs)
        if days:
            r = "%dd %s" % (days, r)
        return r

    @property
    def effective_style(self):
        if self.custom_style:
            return self.custom_style
        else:
            return AlarmSeverity.get_severity(self.severity).style

    def get_root(self):
        """
        Get top-level root alarm
        """
        root = self
        while root.root:
            root = get_alarm(root.root)
        return root

    def update_summary(self):
        def update_dict(d1, d2):
            for k in d2:
                if k in d1:
                    d1[k] += d2[k]
                else:
                    d1[k] = d2[k]

        services = SummaryItem.items_to_dict(self.direct_services)
        subscribers = SummaryItem.items_to_dict(self.direct_subscribers)
        objects = {
            self.managed_object.object_profile.id: 1
        }

        for a in ActiveAlarm.objects.filter(root=self.id):
            a.update_summary()
            update_dict(
                objects,
                SummaryItem.items_to_dict(a.total_objects)
            )
            update_dict(
                services,
                SummaryItem.items_to_dict(a.total_services)
            )
            update_dict(
                subscribers,
                SummaryItem.items_to_dict(a.total_subscribers)
            )
        obj_list = ObjectSummaryItem.dict_to_items(objects)
        svc_list = SummaryItem.dict_to_items(services)
        sub_list = SummaryItem.dict_to_items(subscribers)
        if svc_list != self.total_services or sub_list != self.total_subscribers or obj_list != self.total_objects:
            ns = ServiceSummary.get_severity({
                "service": services,
                "subscriber": subscribers,
                "objects": objects
            })
            self.total_objects = obj_list
            self.total_services = svc_list
            self.total_subscribers = sub_list
            if ns != self.severity:
                self.change_severity(severity=ns, to_save=False)
            self.safe_save()

    def set_root(self, root_alarm):
        """
        Set root cause
        """
        if self.root:
            return
        if self.id == root_alarm.id:
            raise Exception("Cannot set self as root cause")
        # Detect loop
        root = root_alarm
        while root and root.root:
            root = root.root
            if root == self.id:
                return
            root = get_alarm(root)
        # Set root
        self.root = root_alarm.id
        self.log_message(
            "Alarm %s has been marked as root cause" % root_alarm.id)
        # self.save()  Saved by log_message
        root_alarm.log_message(
            "Alarm %s has been marked as child" % self.id)
        root_alarm.update_summary()
        # Clear pending notifications
        # Notification.purge_delayed("alarm:%s" % self.id)

    def escalate(self, tt_id, close_tt=False):
        self.escalation_tt = tt_id
        self.escalation_ts = datetime.datetime.now()
        self.close_tt = close_tt
        self.log_message("Escalated to %s" % tt_id)
        q = {"_id": self.id}
        op = {
            "$set": {
                "escalation_tt": self.escalation_tt,
                "escalation_ts": self.escalation_ts,
                "close_tt": self.close_tt,
                "escalation_error": None
            }
        }
        r = ActiveAlarm._get_collection().update_one(q, op)
        if r.acknowledged and not r.modified_count:
            # Already closed, update archive
            ArchivedAlarm._get_collection().update_one(q, op)

    def set_escalation_error(self, error):
        self.escalation_error = error
        self._get_collection().update_one(
            {"_id": self.id},
            {"$set": {
                "escalation_error": error
            }}
        )

    def set_escalation_context(self):
        current_context, current_span = get_current_span()
        if current_context or self.escalation_ctx:
            self.escalation_ctx = current_context
            self._get_collection().update_one(
                {"_id": self.id},
                {"$set": {
                    "escalation_ctx": current_context
                }}
            )

    def set_clear_notification(self, notification_group, template):
        self.clear_notification_group = notification_group
        self.clear_template = template
        self.safe_save(save_condition={
            "managed_object": {
                "$exists": True
            },
            "id": self.id
        })

    def iter_consequences(self):
        """
        Generator yielding all consequences alarm
        """
        for a in ActiveAlarm.objects.filter(root=self.id):
            yield a
            for ca in a.iter_consequences():
                yield ca

    def iter_affected(self):
        """
        Generator yielding all affected managed objects
        """
        seen = set([self.managed_object])
        yield self.managed_object
        for a in self.iter_consequences():
            if a.managed_object not in seen:
                seen.add(a.managed_object)
                yield a.managed_object

    def iter_escalated(self):
        """
        Generator yielding all escalated consequences
        """
        for a in self.iter_consequences():
            if a.escalation_tt:
                yield a