class AlertStateBase(SparseModel): class Meta: abstract = True unique_together = ('alert_item_type', 'alert_item_id', 'alert_type', 'active') ordering = ['id'] app_label = 'chroma_core' table_name = 'chroma_core_alertstate' """Records a period of time during which a particular issue affected a particular element of the system""" alert_item_type = models.ForeignKey(ContentType, null=True) alert_item_id = models.PositiveIntegerField(null=True) # FIXME: generic foreign key does not automatically set up deletion # of this when the alert_item is deleted -- do it manually alert_item = GenericForeignKey('alert_item_type', 'alert_item_id') alert_type = models.CharField(max_length=128) begin = models.DateTimeField(help_text="Time at which the alert started", default=timezone.now) end = models.DateTimeField(help_text="Time at which the alert was resolved\ if active is false, else time that the alert was last checked (e.g.\ time when we last checked an offline target was still not offline)", null=True) _message = models.TextField( db_column='message', null=True, help_text= 'Message associated with the Alert. Created at Alert creation time') # Note: use True and None instead of True and False so that # unique-together constraint only applied to active alerts active = models.NullBooleanField() # whether a user has manually dismissed alert dismissed = models.BooleanField(default=False, help_text="True denotes that the user " "has acknowledged this alert.") severity = models.IntegerField(default=logging.INFO, help_text=("String indicating the " "severity of the alert, " "one of %s") % STR_TO_SEVERITY.keys()) # This is only used by one event ClientConnectEvent but it is critical and so needs to be searchable etc # for that reason it can't use the variant lustre_pid = models.IntegerField(null=True) # Subclasses set this, used as a default in .notify() default_severity = logging.INFO # For historical compatibility anything called Alert will send and alert email and anything else won't. # This can obviously be overridden by any particular event but gives us a like for behaviour. @property def require_mail_alert(self): return "Alert\'>" in str(type(self)) def get_active_bool(self): return bool(self.active) def set_active_bool(self, value): if value: self.active = True else: self.active = None active_bool = property(get_active_bool, set_active_bool) def to_dict(self): from chroma_core.lib.util import time_str return { 'alert_severity': 'alert', # FIXME: Still need to figure out weather to pass enum or display string. 'alert_item': str(self.alert_item), 'alert_message': self.message(), 'message': self.message(), 'active': bool(self.active), 'begin': time_str(self.begin), 'end': time_str(self.end) if self.end is not None else time_str(self.begin), 'id': self.id, 'alert_item_id': self.alert_item_id, 'alert_item_content_type_id': self.alert_item_type_id } @property def affected_objects(self): """ :return: A list of objects other than the alert_item that are affected by this alert """ return [] def end_event(self): return None def alert_message(self): raise NotImplementedError() def message(self): # The first time this is call __message will be none, so we have to call alert_message to # create the message and then save it. This will occur once for each message. # In the future for new alerts we will try and create them when the Alert is created but # at the time this patch is produced that is tricky. # The purpose of this is to make it so that Alerts can continue to operate when the data required # to create the message no longer exists. # It's a small step for HYD-5736 and a move towards a more efficient model. if self._message is None: self._message = self.alert_message() self.save() return self._message def affected_targets(self, affect_target): pass @classmethod def subclasses(cls): all_subclasses = [] for subclass in cls.__subclasses__(): all_subclasses.append(subclass) all_subclasses.extend(subclass.subclasses()) return all_subclasses @classmethod def filter_by_item(cls, item): if hasattr(item, 'content_type'): # A DowncastMetaclass object return cls.objects.filter(active=True, alert_item_id=item.id, alert_item_type=item.content_type) else: return cls.objects.filter( active=True, alert_item_id=item.pk, alert_item_type__model=item.__class__.__name__.lower(), alert_item_type__app_label=item.__class__._meta.app_label) @classmethod def filter_by_item_id(cls, item_class, item_id): return cls.objects.filter( active=True, alert_item_id=item_id, alert_item_type__model=item_class.__name__.lower(), alert_item_type__app_label=item_class._meta.app_label) @classmethod def notify(cls, alert_item, active, **kwargs): """Notify an alert in the default severity level for that alert""" return cls._notify(alert_item, active, **kwargs) @classmethod def notify_warning(cls, alert_item, active, **kwargs): """Notify an alert in at most the WARNING severity level""" kwargs['attrs_to_save'] = { 'severity': min(cls.default_severity, logging.WARNING) } return cls._notify(alert_item, active, **kwargs) @classmethod def _notify(cls, alert_item, active, **kwargs): if hasattr(alert_item, 'content_type'): alert_item = alert_item.downcast() if active: return cls.high(alert_item, **kwargs) else: return cls.low(alert_item, **kwargs) @classmethod def _get_attrs_to_save(cls, kwargs): # Prepare data to be saved with alert, but not effect the filter_by_item() below # e.g. Only one alert type per alert item can be active, so we don't need to filter on severity. attrs_to_save = kwargs.pop('attrs_to_save', {}) # Add any properties to the attrs_to_save that are not db fields, we can't search on # non db fields after all. Some alerts have custom fields and they will be searched out here. fields = [field.attname for field in cls._meta.fields] for attr in kwargs.keys(): if attr not in fields: attrs_to_save[attr] = kwargs.pop(attr) return attrs_to_save @classmethod def high(cls, alert_item, **kwargs): if hasattr(alert_item, 'not_deleted') and alert_item.not_deleted != True: return None attrs_to_save = cls._get_attrs_to_save(kwargs) try: alert_state = cls.filter_by_item(alert_item).get(**kwargs) except cls.DoesNotExist: kwargs.update(attrs_to_save) if not 'alert_type' in kwargs: kwargs['alert_type'] = cls.__name__ if not 'severity' in kwargs: kwargs['severity'] = cls.default_severity alert_state = cls( active=True, dismissed=False, # Users dismiss, not the software alert_item=alert_item, **kwargs) try: alert_state._message = alert_state.alert_message() alert_state.save() job_log.info( "AlertState: Raised %s on %s " "at severity %s" % (cls, alert_state.alert_item, alert_state.severity)) except IntegrityError, e: job_log.warning( "AlertState: IntegrityError %s saving %s : %s : %s" % (e, cls.__name__, alert_item, kwargs)) # Handle colliding inserts: drop out here, no need to update # the .end of the existing record as we are logically concurrent # with the creator. return None return alert_state
class AlertResource(SeverityResource): """ Notification of a bad health state. Alerts refer to particular objects (such as servers or targets), and can either be active (indicating this is a current problem) or inactive (indicating this is a historical record of a problem). """ message = fields.CharField(readonly=True, help_text=("Human readable description " "of the alert, about one sentence")) alert_item = fields.CharField(help_text="URI of affected item") affected = fields.ListField( null=True, help_text=("List of objects which are affected by the alert " "(e.g. a target alert also affects the file system to " "which the target belongs)"), ) alert_item_str = fields.CharField( readonly=True, help_text=("A human readable noun describing the object " "that is the subject of the alert")) record_type = fields.CharField( attribute="record_type", help_text="The type of the alert described as a Python classes", enumerations=[ class_.__name__ for class_ in util.all_subclasses(AlertStateBase) ], ) severity = fields.CharField( attribute="severity", help_text=("String indicating the " "severity of the alert, " "one of %s") % STR_TO_SEVERITY.keys(), enumerations=STR_TO_SEVERITY.keys(), ) def prepend_urls(self): return [ url( r"^(?P<resource_name>%s)/dismiss_all%s$" % (self._meta.resource_name, trailing_slash()), self.wrap_view("dismiss_all"), name="api_alert_dismiss_all", ) ] def dismiss_all(self, request, **kwargs): if (request.method != "PUT") or (not request.user.is_authenticated()): return http.HttpUnauthorized() AlertState.objects.filter(dismissed=False).exclude( active=True, severity__in=[40, 30]).update(dismissed=True) return http.HttpNoContent() def dehydrate_alert_item(self, bundle): from chroma_api.urls import api return api.get_resource_uri(bundle.obj.alert_item) def dehydrate_alert_item_str(self, bundle): return str(bundle.obj.alert_item) def dehydrate_message(self, bundle): return bundle.obj.message() def dehydrate_affected(self, bundle): from chroma_api.urls import api alert = bundle.obj affected_objects = [] def affect_target(target): affected_objects.append(target) if target.filesystem_member: affected_objects.append(target.filesystem) elif target.target_type == "mgs": for fs in target.managedfilesystem_set.all(): affected_objects.append(fs) affected_objects.extend(alert.affected_objects) alert.affected_targets(affect_target) affected_objects.append(alert.alert_item) return [api.get_resource_uri(ao) for ao in set(affected_objects)] def build_filters(self, filters=None): filters = super(AlertResource, self).build_filters(filters) # Map False to None and 'active_bool' to 'active' if "active_bool__exact" in filters: filters["active__exact"] = None if not filters[ "active_bool__exact"] else True del filters["active_bool__exact"] return filters class Meta: queryset = AlertState.objects.order_by("-begin") resource_name = "alert" filtering = { "begin": SeverityResource.ALL_FILTER_DATE, "end": SeverityResource.ALL_FILTER_DATE, "message": SeverityResource.ALL_FILTER_STR, "active": SeverityResource.ALL_FILTER_BOOL, "dismissed": SeverityResource.ALL_FILTER_BOOL, "id": SeverityResource.ALL_FILTER_INT, "severity": SeverityResource.ALL_FILTER_ENUMERATION, "created_at": SeverityResource.ALL_FILTER_DATE, "alert_type": SeverityResource.ALL_FILTER_ENUMERATION, "alert_item_id": SeverityResource.ALL_FILTER_INT, "lustre_pid": SeverityResource.ALL_FILTER_INT, "record_type": SeverityResource.ALL_FILTER_ENUMERATION, } ordering = ["begin", "end", "active"] serializer = DateSerializer() authorization = DjangoAuthorization() authentication = AnonymousAuthentication() list_allowed_methods = ["get"] detail_allowed_methods = ["get", "patch", "put"] always_return_data = True
class AlertResource(LongPollingAPI, SeverityResource): """ Notification of a bad health state. Alerts refer to particular objects (such as servers or targets), and can either be active (indicating this is a current problem) or inactive (indicating this is a historical record of a problem). """ message = fields.CharField(readonly=True, help_text=("Human readable description " "of the alert, about one sentence")) alert_item = fields.CharField(help_text="URI of affected item") affected = fields.ListField( null=True, help_text=("List of objects which are affected by the alert " "(e.g. a target alert also affects the file system to " "which the target belongs)")) alert_item_str = fields.CharField( readonly=True, help_text=("A human readable noun describing the object " "that is the subject of the alert")) record_type = fields.CharField( attribute='record_type', help_text="The type of the alert described as a Python classes", enumerations=[ class_.__name__ for class_ in util.all_subclasses(AlertStateBase) ]) severity = fields.CharField(attribute='severity', help_text=("String indicating the " "severity of the alert, " "one of %s") % STR_TO_SEVERITY.keys(), enumerations=STR_TO_SEVERITY.keys()) # Long polling should return when any of the tables below changes or has changed. long_polling_tables = [AlertState, LNetOfflineAlert] def dispatch(self, request_type, request, **kwargs): return self.handle_long_polling_dispatch(request_type, request, **kwargs) def prepend_urls(self): return [ url(r'^(?P<resource_name>%s)/dismiss_all%s$' % (self._meta.resource_name, trailing_slash()), self.wrap_view('dismiss_all'), name='api_alert_dismiss_all'), ] def dismiss_all(self, request, **kwargs): if (request.method != 'PUT') or (not request.user.is_authenticated()): return http.HttpUnauthorized() AlertState.objects.filter(dismissed=False).exclude( active=True, severity__in=[40, 30]).update(dismissed=True) return http.HttpNoContent() def dehydrate_alert_item(self, bundle): return api.get_resource_uri(bundle.obj.alert_item) def dehydrate_alert_item_str(self, bundle): return str(bundle.obj.alert_item) def dehydrate_message(self, bundle): return bundle.obj.message() def dehydrate_affected(self, bundle): from chroma_api.urls import api alert = bundle.obj affected_objects = [] def affect_target(target): affected_objects.append(target) if target.filesystem_member: affected_objects.append(target.filesystem) elif target.target_type == "mgs": for fs in target.managedfilesystem_set.all(): affected_objects.append(fs) affected_objects.extend(alert.affected_objects) alert.affected_targets(affect_target) affected_objects.append(alert.alert_item) return [api.get_resource_uri(ao) for ao in set(affected_objects)] def build_filters(self, filters=None): filters = super(AlertResource, self).build_filters(filters) # Map False to None and 'active_bool' to 'active' if 'active_bool__exact' in filters: filters['active__exact'] = None if not filters[ 'active_bool__exact'] else True del filters['active_bool__exact'] return filters class Meta: queryset = AlertState.objects.order_by('-begin') resource_name = 'alert' filtering = { 'begin': SeverityResource.ALL_FILTER_DATE, 'end': SeverityResource.ALL_FILTER_DATE, 'message': SeverityResource.ALL_FILTER_STR, 'active': SeverityResource.ALL_FILTER_BOOL, 'dismissed': SeverityResource.ALL_FILTER_BOOL, 'id': SeverityResource.ALL_FILTER_INT, 'severity': SeverityResource.ALL_FILTER_ENUMERATION, 'created_at': SeverityResource.ALL_FILTER_DATE, 'alert_type': SeverityResource.ALL_FILTER_ENUMERATION, 'alert_item_id': SeverityResource.ALL_FILTER_INT, 'lustre_pid': SeverityResource.ALL_FILTER_INT, 'record_type': SeverityResource.ALL_FILTER_ENUMERATION } ordering = ['begin', 'end', 'active'] authorization = DjangoAuthorization() authentication = AnonymousAuthentication() list_allowed_methods = ['get'] detail_allowed_methods = ['get', 'patch', 'put'] always_return_data = True
class AlertStateBase(SparseModel): class Meta: unique_together = ("alert_item_type", "alert_item_id", "alert_type", "active") ordering = ["id"] app_label = "chroma_core" db_table = "chroma_core_alertstate" table_name = "chroma_core_alertstate" """Records a period of time during which a particular issue affected a particular element of the system""" alert_item_type = models.ForeignKey(ContentType, null=True, on_delete=CASCADE) alert_item_id = models.PositiveIntegerField(null=True) # FIXME: generic foreign key does not automatically set up deletion # of this when the alert_item is deleted -- do it manually alert_item = GenericForeignKey("alert_item_type", "alert_item_id") alert_type = models.CharField(max_length=128) begin = models.DateTimeField(help_text="Time at which the alert started", default=timezone.now) end = models.DateTimeField( help_text="Time at which the alert was resolved\ if active is false, else time that the alert was last checked (e.g.\ time when we last checked an offline target was still not offline)", null=True, ) _message = models.TextField( db_column="message", null=True, help_text="Message associated with the Alert. Created at Alert creation time" ) # Note: use True and None instead of True and False so that # unique-together constraint only applied to active alerts active = models.NullBooleanField() # whether a user has manually dismissed alert dismissed = models.BooleanField( default=False, help_text="True denotes that the user " "has acknowledged this alert." ) severity = models.IntegerField( default=logging.INFO, help_text=("String indicating the " "severity of the alert, " "one of %s") % STR_TO_SEVERITY.keys(), ) # This is only used by one event ClientConnectEvent but it is critical and so needs to be searchable etc # for that reason it can't use the variant lustre_pid = models.IntegerField(null=True) # Subclasses set this, used as a default in .notify() default_severity = logging.INFO # For historical compatibility anything called Alert will send and alert email and anything else won't. # This can obviously be overridden by any particular event but gives us a like for behaviour. @property def require_mail_alert(self): return "Alert'>" in str(type(self)) def get_active_bool(self): return bool(self.active) def set_active_bool(self, value): if value: self.active = True else: self.active = None active_bool = property(get_active_bool, set_active_bool) def to_dict(self): from chroma_core.lib.util import time_str return { "alert_severity": "alert", # FIXME: Still need to figure out weather to pass enum or display string. "alert_item": str(self.alert_item), "alert_message": self.message(), "message": self.message(), "active": bool(self.active), "begin": time_str(self.begin), "end": time_str(self.end) if self.end is not None else time_str(self.begin), "id": self.id, "alert_item_id": self.alert_item_id, "alert_item_content_type_id": self.alert_item_type_id, } @property def affected_objects(self): """ :return: A list of objects other than the alert_item that are affected by this alert """ return [] def end_event(self): return None def alert_message(self): raise NotImplementedError() def message(self): # The first time this is call __message will be none, so we have to call alert_message to # create the message and then save it. This will occur once for each message. # In the future for new alerts we will try and create them when the Alert is created but # at the time this patch is produced that is tricky. # The purpose of this is to make it so that Alerts can continue to operate when the data required # to create the message no longer exists. # It's a small step for HYD-5736 and a move towards a more efficient model. if self._message is None: self._message = self.alert_message() self.save() return self._message def affected_targets(self, affect_target): pass @classmethod def subclasses(cls): all_subclasses = [] for subclass in cls.__subclasses__(): all_subclasses.append(subclass) all_subclasses.extend(subclass.subclasses()) return all_subclasses @classmethod def filter_by_item(cls, item): if hasattr(item, "content_type"): # A DowncastMetaclass object return cls.objects.filter(active=True, alert_item_id=item.id, alert_item_type=item.content_type) else: return cls.objects.filter( active=True, alert_item_id=item.pk, alert_item_type__model=item.__class__.__name__.lower(), alert_item_type__app_label=item.__class__._meta.app_label, ) @classmethod def filter_by_item_id(cls, item_class, item_id): return cls.objects.filter( active=True, alert_item_id=item_id, alert_item_type__model=item_class.__name__.lower(), alert_item_type__app_label=item_class._meta.app_label, ) @classmethod def notify(cls, alert_item, active, **kwargs): """Notify an alert in the default severity level for that alert""" return cls._notify(alert_item, active, **kwargs) @classmethod def notify_warning(cls, alert_item, active, **kwargs): """Notify an alert in at most the WARNING severity level""" kwargs["attrs_to_save"] = {"severity": min(cls.default_severity, logging.WARNING)} return cls._notify(alert_item, active, **kwargs) @classmethod def _notify(cls, alert_item, active, **kwargs): if hasattr(alert_item, "content_type"): alert_item = alert_item.downcast() if active: return cls.high(alert_item, **kwargs) else: return cls.low(alert_item, **kwargs) @classmethod def _get_attrs_to_save(cls, kwargs): # Prepare data to be saved with alert, but not effect the filter_by_item() below # e.g. Only one alert type per alert item can be active, so we don't need to filter on severity. attrs_to_save = kwargs.pop("attrs_to_save", {}) # Add any properties to the attrs_to_save that are not db fields, we can't search on # non db fields after all. Some alerts have custom fields and they will be searched out here. fields = [field.attname for field in cls._meta.fields] for attr in kwargs.keys(): if attr not in fields: attrs_to_save[attr] = kwargs.pop(attr) return attrs_to_save @classmethod def high(cls, alert_item, **kwargs): if hasattr(alert_item, "not_deleted") and alert_item.not_deleted != True: return None attrs_to_save = cls._get_attrs_to_save(kwargs) try: alert_state = cls.filter_by_item(alert_item).get(**kwargs) except cls.DoesNotExist: kwargs.update(attrs_to_save) if not "alert_type" in kwargs: kwargs["alert_type"] = cls.__name__ if not "severity" in kwargs: kwargs["severity"] = cls.default_severity alert_state = cls( active=True, dismissed=False, alert_item=alert_item, **kwargs # Users dismiss, not the software ) try: alert_state._message = alert_state.alert_message() alert_state.save() job_log.info( "AlertState: Raised %s on %s " "at severity %s" % (cls, alert_state.alert_item, alert_state.severity) ) except IntegrityError as e: job_log.warning( "AlertState: IntegrityError %s saving %s : %s : %s" % (e, cls.__name__, alert_item, kwargs) ) # Handle colliding inserts: drop out here, no need to update # the .end of the existing record as we are logically concurrent # with the creator. return None return alert_state @classmethod def low(cls, alert_item, **kwargs): # The caller may provide an end_time rather than wanting now() end_time = kwargs.pop("end_time", timezone.now()) # currently, no attrs are saved when an attr is lowered, so just filter them out of kwargs cls._get_attrs_to_save(kwargs) try: alert_state = cls.filter_by_item(alert_item).get(**kwargs) alert_state.end = end_time alert_state.active = None alert_state.save() # We optionally emit an event when alerts are lowered: we don't do that # for the beginning because that is implicit in the alert itself, whereas # the end can reasonably have a different message. end_event = alert_state.end_event() if end_event: end_event.register_event( end_event.alert_item, severity=end_event.severity, message_str=end_event.message_str, alert=end_event.alert, ) except cls.DoesNotExist: alert_state = None return alert_state @classmethod def register_event(cls, alert_item, **kwargs): # Events are Alerts with no duration, so just go high/low. alert_state = cls.high(alert_item, attrs_to_save=kwargs) cls.low(alert_item, end_time=alert_state.begin, attrs_to_save=kwargs) def cast(self, target_class): """ Works exactly as the super except because we duplicate record_type with alert_type. We should remove in the future, but for now this fixes that up. :param target_class: :return: """ # If the save fails for some reason then this change will have no affect. self.alert_type = target_class._meta.object_name new_alert = super(AlertStateBase, self).cast(target_class) # The message may well have changed so regenerate it. new_alert._message = None new_alert.message() return new_alert