Exemplo n.º 1
0
    def will_analyze_alert(self, alert):
        '''Whether or not to analyze an alert
        '''
        # Will not analyze duplicate alert
        alertId = alert.get_rec_id()
        if alert.dup_alert_recid is not None:
            if alert.dup_alert_recid > 0:
                registry.get_logger().debug('Duplicate alert rec id %d is not analyzed', alert.get_rec_id())
                return False

        else:

            # If the dup_alert_recid field is None, then it has not been set, so determine if 
            # there are duplicates as before
            if self.alertMgr.is_duplicate(alertId):
                registry.get_logger().debug('Duplicate alert id %d is not analyzed.', alert.get_rec_id())
                return False

        # Will not analyze alert without hardware location (C: Compute; I: I/O)
        loc_type = alert.event_loc.get_id()
        if loc_type != 'C' and loc_type != 'I':
            registry.get_logger().debug('Alert id %d with location type %s is not analyzed.', alert.get_rec_id(), loc_type)
            return False

        # Will analyze all other alerts
        return True
Exemplo n.º 2
0
    def analyze_alert(self, alert):
        '''Analyze an alert
        '''
        alert_recId = alert.get_rec_id()
        alert_id = alert.get_incident_id()
        loc_type = alert.event_loc.get_id()
        location = alert.event_loc.get_location()
        #alert_msgId = alert.get_incident_id()
        registry.get_logger().info('Analyzing alert id %d loc_type: %s: %s', alert_recId, loc_type, location)

        # There should only be one condition event associated with the alert.  
        events = alert.condition_events
        if len(events) == 0:
            registry.get_logger().error('No event associated with the alert recid %d', alert_recId)
            registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert)
            return
        event = events.pop()

        if (alert_id == 'BQL01'):
            # No need to analyze BQL01 alerts, just pass it to the delivery queue
            registry.get_logger().info('Nothing to analyze for alert id %s ', alert_id)
            registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert)
            return

        # Get the location 
        loc = Location(loc_type, location)
        locName = self.get_loc_name(loc)

        # No need to analyze alert with rack location
        alert_time = str(alert.get_time_occurred())
        if locName == 'rack':
            registry.get_logger().info('Nothing to analyze for alert recid %d with rack location', alert_recId)
            registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert)
            return

        # Find out if there are other alerts with the same block id (for ENDJOB01 and THRES01)
        dup_qry = ''
        if (alert_id == 'ENDJOB01' or alert_id == 'THRES01'):
            if event.raw_data['block'] is None:
                event_block = None
            else:
                event_block = event.raw_data['block'].strip()
          
            if event_block is None or event_block == BGQ_EVENT_NULL_BLOCK:
                 # Found no prior alert with the same block id, pass current alert to the delivery queue
                registry.get_logger().info('No block id for alert id %d, no common alert generated for block: %s', alert_recId, event_block)
                registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert)
                return

            same_block = False
            # Get db connection needed for query
            dbi = registry.get_service(SERVICE_DB_INTERFACE)
            dbConn = dbi.get_connection()
            cursor = dbConn.cursor()

            if (alert_id == 'ENDJOB01'):
                # For ENDJOB01, look for alert id HWERR01 or COMMON01 with the same block id
                same_block = self.has_matching_blockId(event_block, alert_time, cursor)
            else:
                # For THRES0101, look for alert id HWERR01 or COMMON01 or ENDJOB01 with the same block id
                same_block = self.has_matching_blockId(event_block, alert_time, cursor)

            if same_block:
                # Found prior alert with the same block id, close current alert
                registry.get_logger().info('Closing current alert recid %d due to prior alert with the same block id', alert_recId)
                registry.get_service(SERVICE_ALERT_MGR).close(alert_recId)
            else:
                # Found no prior alert with the same block id, pass current alert to the delivery queue
                registry.get_logger().info('No common block id found for alert id %d within the last %s', alert_recId, self.window_time)
                registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert)

            return

        elif (alert_id == 'BQL01'):
            # No need to analyze BQL01 alerts, just pass it to the delivery queue
            registry.get_logger().info('Nothing to analyze for alert id %s.', alert_id)
            registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert)
            return

        # The following will handle the rest of the alert ids (HWERR01 or COMMON01).
        # Find out if there is common mode alert already exist for the same location or higher hierarchy 
        loc_parent, loc_parent_list = self.get_loc_parent(loc)
        loc_qry = '('
        idx = 0
        for pLoc in loc_parent_list:
            if idx != 0: 
                loc_qry += " or "
            loc_qry +=  " \"event_loc\" like '" + pLoc + "'"
            idx += 1

        dup_qry2 = self.dup_query + loc_qry + ")"
        loc_qry += " or \"event_loc\" like '" + location + "')"
        dup_qry = self.dup_query + loc_qry
        dbi = registry.get_service(SERVICE_DB_INTERFACE)
        dbConn = dbi.get_connection()
        cursor = dbConn.cursor()

        dup = self.has_duplicate(alert_time, dup_qry, cursor)
        if dup:
            # Found prior alert with the same block id, close current alert
            registry.get_logger().info('Closing current alert recid %d due to prior alert with same common location', alert_recId)
            registry.get_service(SERVICE_ALERT_MGR).close(alert_recId)
            return

        # Look for a common hardware problem if there are multiple alerts for different location
        # on the same hardware.
        sendAlert = self.has_common_location(loc, alert_time, self.query, cursor)
        if sendAlert:
            # Send commmon alert
            self.send_common_alert(loc, alert_recId, event, alert_time, dup_qry2, cursor)
        else:
            # Pass current alert to the delivery queue
            registry.get_logger().info('No common location for %s found for alert id: %d within the last %s ', location, alert_recId, self.window_time)
            registry.get_service(SERVICE_ALERT_DELIVERY_Q).put(alert)

        return