def will_analyze_alert(self, alert): '''Whether or not to analyze an alert ''' # Will not analyze duplicate alert alertId = alert.get_rec_id() if alert.dup_alert_recid is not None: if alert.dup_alert_recid > 0: registry.get_logger().debug('Duplicate alert rec id %d is not analyzed', alert.get_rec_id()) return False else: # If the dup_alert_recid field is None, then it has not been set, so determine if # there are duplicates as before if self.alertMgr.is_duplicate(alertId): registry.get_logger().debug('Duplicate alert id %d is not analyzed.', alert.get_rec_id()) return False # Will not analyze alert without hardware location (C: Compute; I: I/O) loc_type = alert.event_loc.get_id() if loc_type != 'C' and loc_type != 'I': registry.get_logger().debug('Alert id %d with location type %s is not analyzed.', alert.get_rec_id(), loc_type) return False # Will analyze all other alerts return True
def analyze_alert(self, alert): '''Analyze an alert ''' alert_recId = alert.get_rec_id() alert_id = alert.get_incident_id() loc_type = alert.event_loc.get_id() location = alert.event_loc.get_location() #alert_msgId = alert.get_incident_id() registry.get_logger().info('Analyzing alert id %d loc_type: %s: %s', alert_recId, loc_type, location) # There should only be one condition event associated with the alert. events = alert.condition_events if len(events) == 0: registry.get_logger().error('No event associated with the alert recid %d', alert_recId) registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert) return event = events.pop() if (alert_id == 'BQL01'): # No need to analyze BQL01 alerts, just pass it to the delivery queue registry.get_logger().info('Nothing to analyze for alert id %s ', alert_id) registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert) return # Get the location loc = Location(loc_type, location) locName = self.get_loc_name(loc) # No need to analyze alert with rack location alert_time = str(alert.get_time_occurred()) if locName == 'rack': registry.get_logger().info('Nothing to analyze for alert recid %d with rack location', alert_recId) registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert) return # Find out if there are other alerts with the same block id (for ENDJOB01 and THRES01) dup_qry = '' if (alert_id == 'ENDJOB01' or alert_id == 'THRES01'): if event.raw_data['block'] is None: event_block = None else: event_block = event.raw_data['block'].strip() if event_block is None or event_block == BGQ_EVENT_NULL_BLOCK: # Found no prior alert with the same block id, pass current alert to the delivery queue registry.get_logger().info('No block id for alert id %d, no common alert generated for block: %s', alert_recId, event_block) registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert) return same_block = False # Get db connection needed for query dbi = registry.get_service(SERVICE_DB_INTERFACE) dbConn = dbi.get_connection() cursor = dbConn.cursor() if (alert_id == 'ENDJOB01'): # For ENDJOB01, look for alert id HWERR01 or COMMON01 with the same block id same_block = self.has_matching_blockId(event_block, alert_time, cursor) else: # For THRES0101, look for alert id HWERR01 or COMMON01 or ENDJOB01 with the same block id same_block = self.has_matching_blockId(event_block, alert_time, cursor) if same_block: # Found prior alert with the same block id, close current alert registry.get_logger().info('Closing current alert recid %d due to prior alert with the same block id', alert_recId) registry.get_service(SERVICE_ALERT_MGR).close(alert_recId) else: # Found no prior alert with the same block id, pass current alert to the delivery queue registry.get_logger().info('No common block id found for alert id %d within the last %s', alert_recId, self.window_time) registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert) return elif (alert_id == 'BQL01'): # No need to analyze BQL01 alerts, just pass it to the delivery queue registry.get_logger().info('Nothing to analyze for alert id %s.', alert_id) registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert) return # The following will handle the rest of the alert ids (HWERR01 or COMMON01). # Find out if there is common mode alert already exist for the same location or higher hierarchy loc_parent, loc_parent_list = self.get_loc_parent(loc) loc_qry = '(' idx = 0 for pLoc in loc_parent_list: if idx != 0: loc_qry += " or " loc_qry += " \"event_loc\" like '" + pLoc + "'" idx += 1 dup_qry2 = self.dup_query + loc_qry + ")" loc_qry += " or \"event_loc\" like '" + location + "')" dup_qry = self.dup_query + loc_qry dbi = registry.get_service(SERVICE_DB_INTERFACE) dbConn = dbi.get_connection() cursor = dbConn.cursor() dup = self.has_duplicate(alert_time, dup_qry, cursor) if dup: # Found prior alert with the same block id, close current alert registry.get_logger().info('Closing current alert recid %d due to prior alert with same common location', alert_recId) registry.get_service(SERVICE_ALERT_MGR).close(alert_recId) return # Look for a common hardware problem if there are multiple alerts for different location # on the same hardware. sendAlert = self.has_common_location(loc, alert_time, self.query, cursor) if sendAlert: # Send commmon alert self.send_common_alert(loc, alert_recId, event, alert_time, dup_qry2, cursor) else: # Pass current alert to the delivery queue registry.get_logger().info('No common location for %s found for alert id: %d within the last %s ', location, alert_recId, self.window_time) registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert) return