Exemplo n.º 1
0
    def run(self):

        while True:
            LOG.debug('Waiting for CloudWatch alarms on %s...', AWS_SQS_QUEUE)
            try:
                notification = self.sqs.read(wait_time_seconds=20)
            except boto.exception.SQSError as e:
                LOG.warning('Could not read from queue: %s', e)
                time.sleep(20)
                continue

            if notification:
                cloudwatchAlert = self.parse_notification(notification)
                try:
                    self.api.send(cloudwatchAlert)
                except Exception as e:
                    LOG.warning('Failed to send alert: %s', e)
                self.sqs.delete_message(notification)

            LOG.debug('Send heartbeat...')
            heartbeat = Heartbeat(tags=[__version__])
            try:
                self.api.send(heartbeat)
            except Exception as e:
                LOG.warning('Failed to send heartbeat: %s', e)
Exemplo n.º 2
0
class SnmpTrapHandler(object):
    def __init__(self):

        self.api = None

    def run(self):

        endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080')
        key = os.environ.get('ALERTA_API_KEY', None)

        self.api = ApiClient(endpoint=endpoint, key=key)

        data = sys.stdin.read()
        LOG.info('snmptrapd -> %r', data)
        data = unicode(data, 'utf-8', errors='ignore')
        LOG.debug('unicoded -> %s', data)

        snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data)

        if snmptrapAlert:
            try:
                self.api.send(snmptrapAlert)
            except Exception, e:
                LOG.warning('Failed to send alert: %s', e)

        LOG.debug('Send heartbeat...')
        heartbeat = Heartbeat(tags=[__version__])
        try:
            self.api.send(heartbeat)
        except Exception, e:
            LOG.warning('Failed to send heartbeat: %s', e)
Exemplo n.º 3
0
def main():

    listener = Listener()

    while True:
        listener.send_cmd('READY\n')

        data = sys.stdin.readline()
        headers = dict([x.split(':') for x in data.split()])
        data = sys.stdin.read(int(headers['len']))
        body = dict([x.split(':') for x in data.split()])

        event = headers['eventname']
        if event.startswith('TICK'):
            supervisorAlert = Heartbeat(origin='supervisord',
                                        tags=[headers['ver'], event])
        else:
            if event.endswith('FATAL'):
                severity = 'critical'
            elif event.endswith('BACKOFF'):
                severity = 'warning'
            elif event.endswith('EXITED'):
                severity = 'minor'
            else:
                severity = 'normal'

            supervisorAlert = Alert(
                resource=body['processname'],
                environment='Production',
                service=['supervisord'],
                event=event,
                correlate=[
                    'PROCESS_STATE_STARTING', 'PROCESS_STATE_RUNNING',
                    'PROCESS_STATE_BACKOFF', 'PROCESS_STATE_STOPPING',
                    'PROCESS_STATE_EXITED', 'PROCESS_STATE_STOPPED',
                    'PROCESS_STATE_FATAL', 'PROCESS_STATE_UNKNOWN'
                ],
                value='serial=%s' % headers['serial'],
                severity=severity,
                origin=headers['server'],
                text='State changed from %s to %s.' %
                (body['from_state'], event),
                raw_data='%s\n\n%s' % (json.dumps(headers), json.dumps(body)))
        try:
            listener.api.send(supervisorAlert)
        except Exception as e:
            listener.log_stderr(e)
            listener.send_cmd('RESULT 4\nFAIL')
        else:
            listener.send_cmd('RESULT 2\nOK')
Exemplo n.º 4
0
def create_heartbeat():

    try:
        heartbeat = Heartbeat.parse_heartbeat(request.data)
    except ValueError as e:
        return jsonify(status="error", message=str(e)), 400

    try:
        heartbeat = db.save_heartbeat(heartbeat)
    except Exception as e:
        return jsonify(status="error", message=str(e)), 500

    body = heartbeat.get_body()
    body['href'] = "%s/%s" % (request.base_url, heartbeat.id)
    return jsonify(status="ok", id=heartbeat.id, heartbeat=body), 201, {'Location': '%s/%s' % (request.base_url, heartbeat.id)}
Exemplo n.º 5
0
def create_heartbeat():

    try:
        heartbeat = Heartbeat.parse_heartbeat(request.data)
    except ValueError as e:
        return jsonify(status="error", message=str(e)), 400

    try:
        heartbeat = db.save_heartbeat(heartbeat)
    except Exception as e:
        return jsonify(status="error", message=str(e)), 500

    body = heartbeat.get_body()
    body['href'] = "%s/%s" % (request.base_url, heartbeat.id)
    return jsonify(status="ok", id=heartbeat.id, heartbeat=body), 201, {
        'Location': '%s/%s' % (request.base_url, heartbeat.id)
    }
Exemplo n.º 6
0
def create_heartbeat():

    try:
        heartbeat = Heartbeat.parse_heartbeat(request.data)
    except ValueError as e:
        return jsonify(status="error", message=str(e)), 400

    if g.get('role', None) != 'admin':
        heartbeat.customer = g.get('customer', None)

    try:
        heartbeat = db.save_heartbeat(heartbeat)
    except Exception as e:
        return jsonify(status="error", message=str(e)), 500

    body = heartbeat.get_body()
    body['href'] = absolute_url('/heartbeat/' + heartbeat.id)
    return jsonify(status="ok", id=heartbeat.id, heartbeat=body), 201, {'Location': body['href']}
Exemplo n.º 7
0
def create_heartbeat():

    try:
        heartbeat = Heartbeat.parse_heartbeat(request.data)
    except ValueError as e:
        return jsonify(status="error", message=str(e)), 400

    if g.get('role', None) != 'admin':
        heartbeat.customer = g.get('customer', None)

    try:
        heartbeat = db.save_heartbeat(heartbeat)
    except Exception as e:
        return jsonify(status="error", message=str(e)), 500

    body = heartbeat.get_body()
    body['href'] = absolute_url('/heartbeat/' + heartbeat.id)
    return jsonify(status="ok", id=heartbeat.id, heartbeat=body), 201, {'Location': body['href']}
Exemplo n.º 8
0
class UrlmonDaemon(object):
    def __init__(self):

        self.shuttingdown = False

    def run(self):

        self.running = True

        self.queue = Queue.Queue()
        self.api = self.api = ApiClient(endpoint=settings.ENDPOINT,
                                        key=settings.API_KEY)

        # Start worker threads
        LOG.debug('Starting %s worker threads...', SERVER_THREADS)
        for i in range(SERVER_THREADS):
            w = WorkerThread(self.queue, self.api)
            try:
                w.start()
            except Exception, e:
                LOG.error('Worker thread #%s did not start: %s', i, e)
                continue
            LOG.info('Started worker thread: %s', w.getName())

        while not self.shuttingdown:
            try:
                for check in settings.checks:
                    self.queue.put((check, time.time()))

                LOG.debug('Send heartbeat...')
                heartbeat = Heartbeat(tags=[__version__])
                try:
                    self.api.send(heartbeat)
                except Exception, e:
                    LOG.warning('Failed to send heartbeat: %s', e)

                time.sleep(LOOP_EVERY)
                LOG.info('URL check queue length is %d', self.queue.qsize())

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True
Exemplo n.º 9
0
    def heartbeat(self, args):

        try:
            heartbeat = Heartbeat(origin=args.origin,
                                  tags=args.tags,
                                  timeout=args.timeout)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        try:
            response = self.api.send(heartbeat)
        except Exception as e:
            LOG.error(e)
            sys.exit(1)

        if response['status'] == 'ok':
            print(response['id'])
        else:
            LOG.error(response['message'])
            sys.exit(1)
Exemplo n.º 10
0
def create_heartbeat(tenant):

    tenant = generateDBName(tenant)

    try:
        heartbeat = Heartbeat.parse_heartbeat(request.data)
    except ValueError as e:
        return jsonify(status="error", message=str(e)), 400


    if g.get('role', None) != 'admin':
        heartbeat.customer = g.get('customer', None)


    try:
        heartbeat = db.save_heartbeat(tenant, heartbeat)
    except Exception as e:
        return jsonify(status="error", message=str(e)), 500

    body = heartbeat.get_body()
    body['href'] = "%s/%s" % (request.base_url, heartbeat.id)
    return jsonify(status="ok", id=heartbeat.id, heartbeat=body), 201, {'Location': '%s/%s' % (request.base_url, heartbeat.id)}
Exemplo n.º 11
0
    def parse_snmptrap(data):

        pdu_data = data.splitlines()
        varbind_list = pdu_data[:]

        trapvars = dict()
        for line in pdu_data:
            if line.startswith('$'):
                special, value = line.split(None, 1)
                trapvars[special] = value
                varbind_list.pop(0)

        if '$s' in trapvars:
            if trapvars['$s'] == '0':
                version = 'SNMPv1'
            elif trapvars['$s'] == '1':
                version = 'SNMPv2c'
            elif trapvars['$s'] == '2':
                version = 'SNMPv2u'  # not supported
            else:
                version = 'SNMPv3'
            trapvars['$s'] = version
        else:
            LOG.warning('Failed to parse unknown trap type.')
            return

        # Get varbinds
        varbinds = dict()
        idx = 0
        for varbind in '\n'.join(varbind_list).split('~%~'):
            if varbind == '':
                break
            idx += 1
            try:
                oid, value = varbind.split(None, 1)
            except ValueError:
                oid = varbind
                value = ''
            varbinds[oid] = value
            trapvars['$' + str(idx)] = value  # $n
            LOG.debug('$%s %s', str(idx), value)

        trapvars['$q'] = trapvars['$q'].lstrip(
            '.')  # if numeric, remove leading '.'
        trapvars['$#'] = str(idx)

        LOG.debug('varbinds = %s', varbinds)

        LOG.debug('version = %s', version)

        correlate = list()

        if version == 'SNMPv1':
            if trapvars['$w'] == '0':
                trapvars['$O'] = 'coldStart'
                correlate = ['coldStart', 'warmStart']
            elif trapvars['$w'] == '1':
                trapvars['$O'] = 'warmStart'
                correlate = ['coldStart', 'warmStart']
            elif trapvars['$w'] == '2':
                trapvars['$O'] = 'linkDown'
                correlate = ['linkUp', 'linkDown']
            elif trapvars['$w'] == '3':
                trapvars['$O'] = 'linkUp'
                correlate = ['linkUp', 'linkDown']
            elif trapvars['$w'] == '4':
                trapvars['$O'] = 'authenticationFailure'
            elif trapvars['$w'] == '5':
                trapvars['$O'] = 'egpNeighborLoss'
            elif trapvars['$w'] == '6':  # enterpriseSpecific(6)
                if trapvars['$q'].isdigit(
                ):  # XXX - specific trap number was not decoded
                    trapvars['$O'] = '%s.0.%s' % (trapvars['$N'],
                                                  trapvars['$q'])
                else:
                    trapvars['$O'] = trapvars['$q']

        elif version == 'SNMPv2c':
            if 'coldStart' in trapvars['$2']:
                trapvars['$w'] = '0'
                trapvars['$W'] = 'Cold Start'
            elif 'warmStart' in trapvars['$2']:
                trapvars['$w'] = '1'
                trapvars['$W'] = 'Warm Start'
            elif 'linkDown' in trapvars['$2']:
                trapvars['$w'] = '2'
                trapvars['$W'] = 'Link Down'
            elif 'linkUp' in trapvars['$2']:
                trapvars['$w'] = '3'
                trapvars['$W'] = 'Link Up'
            elif 'authenticationFailure' in trapvars['$2']:
                trapvars['$w'] = '4'
                trapvars['$W'] = 'Authentication Failure'
            elif 'egpNeighborLoss' in trapvars['$2']:
                trapvars['$w'] = '5'
                trapvars['$W'] = 'EGP Neighbor Loss'
            else:
                trapvars['$w'] = '6'
                trapvars['$W'] = 'Enterprise Specific'
            trapvars['$O'] = trapvars['$2']  # SNMPv2-MIB::snmpTrapOID.0

        LOG.debug('trapvars = %s', trapvars)

        LOG.info('%s-Trap-PDU %s from %s at %s %s', version, trapvars['$O'],
                 trapvars['$B'], trapvars['$x'], trapvars['$X'])

        if trapvars['$B'] != '<UNKNOWN>':
            resource = trapvars['$B']
        elif trapvars['$A'] != '0.0.0.0':
            resource = trapvars['$A']
        else:
            m = re.match(r'UDP: \[(\d+\.\d+\.\d+\.\d+)\]', trapvars['$b'])
            if m:
                resource = m.group(1)
            else:
                resource = '<NONE>'

        # Defaults
        event = trapvars['$O']
        severity = 'normal'
        group = 'SNMP'
        value = trapvars['$w']
        text = trapvars['$W']
        environment = 'Production'
        service = ['Network']
        attributes = {'source': trapvars['$B']}
        tags = [version]
        timeout = None
        create_time = datetime.datetime.strptime(
            '%sT%s.000Z' % (trapvars['$x'], trapvars['$X']),
            '%Y-%m-%dT%H:%M:%S.%fZ')

        snmptrapAlert = Alert(
            resource=resource,
            event=event,
            correlate=correlate,
            group=group,
            value=value,
            severity=severity,
            environment=environment,
            service=service,
            text=text,
            event_type='snmptrapAlert',
            attributes=attributes,
            tags=tags,
            timeout=timeout,
            create_time=create_time,
            raw_data=data,
        )

        SnmpTrapHandler.translate_alert(snmptrapAlert, trapvars)

        if snmptrapAlert.get_type() == 'Heartbeat':
            snmptrapAlert = Heartbeat(origin=snmptrapAlert.origin,
                                      tags=[__version__],
                                      timeout=snmptrapAlert.timeout)

        return snmptrapAlert
Exemplo n.º 12
0
    def parse_syslog(self, addr, data):

        LOG.debug('Parsing syslog message...')
        syslogAlerts = list()

        event = None
        resource = None

        for msg in data.split('\n'):

            # NOTE: if syslog msgs aren't being split on newlines and #012 appears instead then
            #       try adding "$EscapeControlCharactersOnReceive off" to rsyslog.conf

            if not msg or 'last message repeated' in msg:
                continue

            if re.match('<\d+>1', msg):
                # Parse RFC 5424 compliant message
                m = re.match(r'<(\d+)>1 (\S+) (\S+) (\S+) (\S+) (\S+) (.*)', msg)
                if m:
                    PRI = int(m.group(1))
                    ISOTIMESTAMP = m.group(2)
                    HOSTNAME = m.group(3)
                    APPNAME = m.group(4)
                    PROCID = m.group(5)
                    MSGID = m.group(6)
                    TAG = '%s[%s] %s' % (APPNAME, PROCID, MSGID)
                    MSG = m.group(7)
                    LOG.info("Parsed RFC 5424 message OK")
                else:
                    LOG.error("Could not parse RFC 5424 syslog message: %s", msg)
                    continue

            elif re.match(r'<(\d{1,3})>\S{3}\s', msg):
                # Parse RFC 3164 compliant message
                m = re.match(r'<(\d{1,3})>\S{3}\s{1,2}\d?\d \d{2}:\d{2}:\d{2} (\S+)( (\S+):)? (.*)', msg)
                if m:
                    PRI = int(m.group(1))
                    HOSTNAME = m.group(2)
                    TAG = m.group(4)
                    MSG = m.group(5)
                    LOG.info("Parsed RFC 3164 message OK")
                else:
                    LOG.error("Could not parse RFC 3164 syslog message: %s", msg)
                    continue

            elif re.match('<\d+>.*%[A-Z0-9_-]+', msg):
                # Parse Cisco Syslog message
                m = re.match('<(\d+)>.*(%([A-Z0-9_-]+)):? (.*)', msg)
                if m:
                    LOG.debug(m.groups())
                    PRI = int(m.group(1))
                    CISCO_SYSLOG = m.group(2)
                    try:
                        CISCO_FACILITY, CISCO_SEVERITY, CISCO_MNEMONIC = m.group(3).split('-')
                    except ValueError, e:
                        LOG.error('Could not parse Cisco syslog - %s: %s', e, m.group(3))
                        CISCO_FACILITY = CISCO_SEVERITY = CISCO_MNEMONIC = 'na'

                    TAG = CISCO_MNEMONIC
                    MSG = m.group(4)

                    event = CISCO_SYSLOG

                    # replace IP address with a hostname, if necessary
                    try:
                        socket.inet_aton(addr)
                        (resource, _, _) = socket.gethostbyaddr(addr)
                    except (socket.error, socket.herror):
                        resource = addr

                    resource = '%s:%s' % (resource, CISCO_FACILITY)
                else:
                    LOG.error("Could not parse Cisco syslog message: %s", msg)
                    continue

            facility, level = decode_priority(PRI)

            # Defaults
            event = event or '%s%s' % (facility.capitalize(), level.capitalize())
            resource = resource or '%s%s' % (HOSTNAME, ':' + TAG if TAG else '')
            severity = priority_to_code(level)
            group = 'Syslog'
            value = level
            text = MSG
            environment = 'Production'
            service = ['Platform']
            tags = ['%s.%s' % (facility, level)]
            correlate = list()
            timeout = None
            raw_data = msg

            syslogAlert = Alert(
                resource=resource,
                event=event,
                correlate=correlate,
                group=group,
                value=value,
                severity=severity,
                environment=environment,
                service=service,
                text=text,
                event_type='syslogAlert',
                tags=tags,
                timeout=timeout,
                raw_data=raw_data,
            )

            suppress = False
            try:
                suppress = Transformers.normalise_alert(syslogAlert, facility=facility, level=level)
            except RuntimeWarning:
                pass

            if suppress:
                LOG.info('Suppressing %s.%s alert', facility, level)
                LOG.debug('%s', syslogAlert)
                continue

            if syslogAlert.get_type() == 'Heartbeat':
                syslogAlert = Heartbeat(origin=syslogAlert.origin, timeout=syslogAlert.timeout)

            syslogAlerts.append(syslogAlert)
Exemplo n.º 13
0
                            data = client.recv(4096)
                            data = unicode(data, 'utf-8', errors='ignore')
                            client.close()
                            LOG.debug('Syslog TCP data received from %s: %s', addr, data)

                        syslogAlerts = self.parse_syslog(addr[0], data)
                        for syslogAlert in syslogAlerts:
                            try:
                                self.api.send(syslogAlert)
                            except Exception, e:
                                LOG.warning('Failed to send alert: %s', e)

                    count += 1
                if not ip or count % 5 == 0:
                    LOG.debug('Send heartbeat...')
                    heartbeat = Heartbeat(tags=[__version__])
                    try:
                        self.api.send(heartbeat)
                    except Exception, e:
                        LOG.warning('Failed to send heartbeat: %s', e)

            except (KeyboardInterrupt, SystemExit):
                self.shuttingdown = True

        LOG.info('Shutdown request received...')

    def parse_syslog(self, addr, data):

        LOG.debug('Parsing syslog message...')
        syslogAlerts = list()