class SnmpTrapHandler(object): def __init__(self): self.api = None def run(self): endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080') key = os.environ.get('ALERTA_API_KEY', None) self.api = ApiClient(endpoint=endpoint, key=key) data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) if snmptrapAlert: try: self.api.send(snmptrapAlert) except Exception, e: LOG.warning('Failed to send alert: %s', e) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception, e: LOG.warning('Failed to send heartbeat: %s', e)
def run(self): api = ApiClient(endpoint=OPTIONS['endpoint'], key=OPTIONS['key']) keep_alive = 0 while not self.should_stop: for alertid in on_hold.keys(): try: (alert, hold_time) = on_hold[alertid] except KeyError: continue if time.time() > hold_time: self.send_email(alert) try: del on_hold[alertid] except KeyError: continue if keep_alive >= 10: tag = OPTIONS['smtp_host'] or 'alerta-mailer' try: api.send(Heartbeat(tags=[tag])) except Exception as e: time.sleep(5) continue keep_alive = 0 keep_alive += 1 time.sleep(2)
def __init__(self, endpoint, key): self.api = ApiClient(endpoint=endpoint, key=key) self.version = '' self.last_metrics = dict() self.app_last_time = None self.rate_metrics = dict() self.latency_metrics = dict() self.alerts = [] self.total = 0 self.status = '' self.groupby = None self.top10 = [] self.last_time = datetime.utcnow() # self.prev_time = None # self.diff_time = 0 # self.running_total = 0 # self.start_time = time.time() # self.max_rate = 0.0 # self.latency = 0 # self.running_latency = 0 # # self.dupl_cache = dict() # self.dupl_total = 0 # self.running_dupl_total = 0 self.running = True threading.Thread.__init__(self)
def main(): api = ApiClient() listener = Listener() while True: listener.send_cmd('READY\n') headers, body = listener.wait() event = headers['eventname'] if event.startswith('TICK'): supervisorAlert = Heartbeat( origin='supervisord', tags=[headers['ver'], event] ) else: if event.endswith('FATAL'): severity = 'critical' elif event.endswith('BACKOFF'): severity = 'warning' elif event.endswith('EXITED'): severity = 'minor' else: severity = 'normal' supervisorAlert = Alert( resource='%s:%s' % (platform.uname()[1], body['processname']), environment='Production', service=['supervisord'], event=event, correlate=[ 'PROCESS_STATE_STARTING', 'PROCESS_STATE_RUNNING', 'PROCESS_STATE_BACKOFF', 'PROCESS_STATE_STOPPING', 'PROCESS_STATE_EXITED', 'PROCESS_STATE_STOPPED', 'PROCESS_STATE_FATAL', 'PROCESS_STATE_UNKNOWN' ], value='serial=%s' % headers['serial'], severity=severity, origin=headers['server'], text='State changed from %s to %s.' % (body['from_state'], event), raw_data='%s\n\n%s' % (json.dumps(headers), json.dumps(body)) ) try: api.send(supervisorAlert) except Exception as e: listener.log_stderr(e) listener.send_cmd('RESULT 4\nFAIL') else: listener.send_cmd('RESULT 2\nOK')
def on_callback_query(self, msg): query_id, from_id, query_data = telepot.glance(msg, flavor='callback_query') for message in sent_messages: if msg['message']['message_id'] == message['message_id']: sent_messages.remove(message) self.editor.editMessageReplyMarkup() self.bot.sendMessage(self.id[0], "Thanks... !") api = ApiClient(endpoint=config.ALERTA_API_URL, key=config.ALERTA_API_KEY) text = "status change via API (Telegram) by {}.".format(message['telegram']) api.update_status(alertid=message['data']['id'], status = query_data, text=text) break else: self.editor.editMessageReplyMarkup() self.bot.sendMessage(self.id[0], "Oh Sorry, You were too late. You can not ACK this now.") self.close()
def main(): api = ApiClient() listener = Listener() while True: listener.send_cmd('READY\n') headers, body = listener.wait() event = headers['eventname'] if event.startswith('TICK'): supervisorAlert = Heartbeat(origin='supervisord', tags=[headers['ver'], event]) else: if event.endswith('FATAL'): severity = 'critical' elif event.endswith('BACKOFF'): severity = 'warning' elif event.endswith('EXITED'): severity = 'minor' else: severity = 'normal' supervisorAlert = Alert( resource='%s:%s' % (platform.uname()[1], body['processname']), environment='Production', service=['supervisord'], event=event, correlate=[ 'PROCESS_STATE_STARTING', 'PROCESS_STATE_RUNNING', 'PROCESS_STATE_BACKOFF', 'PROCESS_STATE_STOPPING', 'PROCESS_STATE_EXITED', 'PROCESS_STATE_STOPPED', 'PROCESS_STATE_FATAL', 'PROCESS_STATE_UNKNOWN' ], value='serial=%s' % headers['serial'], severity=severity, origin=headers['server'], text='State changed from %s to %s.' % (body['from_state'], event), raw_data='%s\n\n%s' % (json.dumps(headers), json.dumps(body))) try: api.send(supervisorAlert) except Exception as e: listener.log_stderr(e) listener.send_cmd('RESULT 4\nFAIL') else: listener.send_cmd('RESULT 2\nOK')
def alert(self, matches): # Matches is a list of match dictionaries. # It contains more than one match when the alert has # the aggregation option set # for match in matches: api = ApiClient(endpoint=self.endpoint, key=self.api_key) alert = Alert(resource=self.resource, event=self.event, environment=self.environment, service=self.service, severity=self.severity, text=self.text, value=self.value) api.send(alert)
def run(self): endpoint = os.environ.get('ALERTA_ENDPOINT', 'http://localhost:8080') key = os.environ.get('ALERTA_API_KEY', None) self.api = ApiClient(endpoint=endpoint, key=key) data = sys.stdin.read() LOG.info('snmptrapd -> %r', data) data = unicode(data, 'utf-8', errors='ignore') LOG.debug('unicoded -> %s', data) snmptrapAlert = SnmpTrapHandler.parse_snmptrap(data) if snmptrapAlert: try: self.api.send(snmptrapAlert) except Exception, e: LOG.warning('Failed to send alert: %s', e)
def __init__(self): self.api = ApiClient() try: connection = boto.sqs.connect_to_region( AWS_REGION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) except boto.exception.SQSError as e: LOG.error('SQS API call failed: %s', e) sys.exit(1) try: self.sqs = connection.create_queue(AWS_SQS_QUEUE) self.sqs.set_message_class(RawMessage) except boto.exception.SQSError as e: LOG.error('SQS queue error: %s', e) sys.exit(1)
def __init__(self): self.api = ApiClient() LOG.info('Starting UDP listener...') # Set up syslog UDP listener try: self.udp = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) self.udp.bind(('', SYSLOG_UDP_PORT)) except socket.error, e: LOG.error('Syslog UDP error: %s', e) sys.exit(2)
def run(self): api = ApiClient(endpoint=OPTIONS['endpoint'], key=OPTIONS['key']) keep_alive = 0 while not self.should_stop: for alertid in on_hold.keys(): try: (alert, hold_time) = on_hold[alertid] except KeyError: continue if time.time() > hold_time: self.send_email(alert) try: del on_hold[alertid] except KeyError: continue
def run(self): self.running = True self.queue = Queue.Queue() self.api = self.api = ApiClient(endpoint=settings.ENDPOINT, key=settings.API_KEY) # Start worker threads LOG.debug('Starting %s worker threads...', SERVER_THREADS) for i in range(SERVER_THREADS): w = WorkerThread(self.queue, self.api) try: w.start() except Exception, e: LOG.error('Worker thread #%s did not start: %s', i, e) continue LOG.info('Started worker thread: %s', w.getName())
def run(self): self.running = True # Create internal queue self.queue = Queue.Queue() self.api = ApiClient() # Initialiase ping targets ping_list = init_targets() # Start worker threads LOG.debug('Starting %s worker threads...', SERVER_THREAD_COUNT) for i in range(SERVER_THREAD_COUNT): w = WorkerThread(self.api, self.queue) try: w.start() except Exception, e: LOG.error('Worker thread #%s did not start: %s', i, e) continue LOG.info('Started worker thread: %s', w.getName())
class UpdateThread(threading.Thread): def __init__(self, endpoint, key): self.api = ApiClient(endpoint=endpoint, key=key) self.version = '' self.last_metrics = dict() self.app_last_time = None self.rate_metrics = dict() self.latency_metrics = dict() self.alerts = [] self.total = 0 self.status = '' self.groupby = None self.top10 = [] self.last_time = datetime.utcnow() # self.prev_time = None # self.diff_time = 0 # self.running_total = 0 # self.start_time = time.time() # self.max_rate = 0.0 # self.latency = 0 # self.running_latency = 0 # # self.dupl_cache = dict() # self.dupl_total = 0 # self.running_dupl_total = 0 self.running = True threading.Thread.__init__(self) def run(self): self.running = True while self.running: self.status = 'updating...' self._update() try: time.sleep(2) except (KeyboardInterrupt, SystemExit): sys.exit(0) def set_groupby(self, groupby): self.groupby = groupby def get_groupby(self): return self.groupby def _update(self): try: status = self.api.get_status() except ConnectionError: return '' self.version = 'v' + status['version'] app_time = status['time'] / 1000 # epoch ms metrics = [ metric for metric in status['metrics'] if metric['type'] == 'timer' ] for m in metrics: count = m['count'] totalTime = m['totalTime'] if m['name'] in self.last_metrics: (last_count, last_totalTime) = self.last_metrics[m['name']] else: self.last_metrics[m['name']] = (count, totalTime) self.app_last_time = app_time continue if count >= last_count: num = count - last_count period = app_time - self.app_last_time try: rate = 1.0 * num / period except ZeroDivisionError: rate = 0.0 self.rate_metrics[m['name']] = rate if totalTime >= last_totalTime: diff = totalTime - last_totalTime num = count - last_count try: latency = 1.0 * diff / num except ZeroDivisionError: latency = 0.0 self.latency_metrics[m['name']] = latency self.last_metrics[m['name']] = (count, totalTime) self.app_last_time = app_time try: response = self.api.get_alerts() except ConnectionError: return '' with lock: self.alerts = response.get('alerts', []) self.total = response.get('total', 0) self.status = '%s - %s' % (response['status'], response.get('message', 'no errors')) try: response = self.api.get_top10(self.groupby) except ConnectionError: return '' with lock: self.top10 = response.get('top10', [])
j = json.load(sys.stdin) print "Request:" print j url = client.kv.get('alerta/apiurl')[1]['Value'] key = client.kv.get('alerta/apikey')[1]['Value'] max_retries = int(client.kv.get('alerta/max_retries')[1]['Value']) sleep = int(client.kv.get('alerta/sleep')[1]['Value']) timeout = int(client.kv.get('alerta/timeout')[1]['Value']) origin = client.kv.get('alerta/origin')[1]['Value'] alerttype = client.kv.get('alerta/alerttype')[1]['Value'] api = ApiClient(endpoint=url, key=key) SEVERITY_MAP = { 'critical': 'critical', 'warning': 'warning', 'passing': 'ok', } def createalert(data): try: environment = client.kv.get('alerta/env/{0}'.format( data['Node']))[1]['Value'] except: environment = client.kv.get('alerta/defaultenv')[1]['Value'] alert = Alert(resource=data['Node'],
#!/usr/bin/env python from alertaclient.api import ApiClient from alertaclient.alert import Alert api = ApiClient() alert = Alert(resource='web-server-01', event='HttpError', correlate=['HttpOK'], group='Web', environment='Production', service=['theguardian.com'], severity='major', value='Bad Gateway (502)', text='Web server error.', tags=['web', 'dc1', 'london'], attributes={'customer': 'The Guardian'}) print alert try: print api.send(alert) except Exception as e: print e
class CloudWatch(object): def __init__(self): self.api = ApiClient() try: connection = boto.sqs.connect_to_region( AWS_REGION, aws_access_key_id=AWS_ACCESS_KEY_ID, aws_secret_access_key=AWS_SECRET_ACCESS_KEY) except boto.exception.SQSError as e: LOG.error('SQS API call failed: %s', e) sys.exit(1) try: self.sqs = connection.create_queue(AWS_SQS_QUEUE) self.sqs.set_message_class(RawMessage) except boto.exception.SQSError as e: LOG.error('SQS queue error: %s', e) sys.exit(1) def run(self): while True: LOG.debug('Waiting for CloudWatch alarms on %s...', AWS_SQS_QUEUE) try: notification = self.sqs.read(wait_time_seconds=20) except boto.exception.SQSError as e: LOG.warning('Could not read from queue: %s', e) time.sleep(20) continue if notification: cloudwatchAlert = self.parse_notification(notification) try: self.api.send(cloudwatchAlert) except Exception as e: LOG.warning('Failed to send alert: %s', e) self.sqs.delete_message(notification) LOG.debug('Send heartbeat...') heartbeat = Heartbeat(tags=[__version__]) try: self.api.send(heartbeat) except Exception as e: LOG.warning('Failed to send heartbeat: %s', e) def parse_notification(self, notification): notification = json.loads(notification.get_body()) alarm = json.loads(notification['Message']) if 'Trigger' not in alarm: return # Defaults resource = '%s:%s' % (alarm['Trigger']['Dimensions'][0]['name'], alarm['Trigger']['Dimensions'][0]['value']) event = alarm['AlarmName'] severity = self.cw_state_to_severity(alarm['NewStateValue']) group = 'CloudWatch' value = alarm['Trigger']['MetricName'] text = alarm['AlarmDescription'] service = [ AWS_ACCOUNT_ID.get(alarm['AWSAccountId'], 'AWSAccountId:' + alarm['AWSAccountId']) ] tags = [alarm['Trigger']['Namespace']] correlate = list() origin = notification['TopicArn'] timeout = None create_time = datetime.datetime.strptime(notification['Timestamp'], '%Y-%m-%dT%H:%M:%S.%fZ') raw_data = notification['Message'] cloudwatchAlert = Alert( resource=resource, event=event, correlate=correlate, group=group, value=value, severity=severity, environment='Production', service=service, text=text, event_type='cloudwatchAlarm', tags=tags, attributes={ 'awsMessageId': notification['MessageId'], 'awsRegion': alarm['Region'], 'thresholdInfo': alarm['NewStateReason'] }, origin=origin, timeout=timeout, create_time=create_time, raw_data=raw_data, ) return cloudwatchAlert @staticmethod def cw_state_to_severity(state): if state == 'ALARM': return 'major' elif state == 'INSUFFICIENT_DATA': return 'warning' elif state == 'OK': return 'normal' else: return 'unknown'
def main(): config_file = os.environ.get('ALERTA_CONF_FILE') or OPTIONS['config_file'] config = configparser.RawConfigParser(defaults=OPTIONS) try: config.read(os.path.expanduser(config_file)) except Exception: sys.exit("Problem reading configuration file %s - is this an ini file?" % config_file) parser = argparse.ArgumentParser( prog='zabbix-alerta', usage='zabbix-alerta SENDTO SUMMARY BODY', description='Zabbix-to-Alerta integration script', epilog=epilog, formatter_class=argparse.RawTextHelpFormatter ) parser.add_argument( 'sendto', help='config profile or alerta API endpoint and key' ) parser.add_argument( 'summary', help='alert summary' ) parser.add_argument( 'body', help='alert body (see format below)' ) args, left = parser.parse_known_args() # sendto=apiUrl[;key] if args.sendto.startswith('http://') or args.sendto.startswith('https://'): want_profile = None try: OPTIONS['endpoint'], OPTIONS['key'] = args.sendto.split(';', 1) except ValueError: OPTIONS['endpoint'] = args.sendto # sendto=profile else: want_profile = args.sendto or os.environ.get('ALERTA_DEFAULT_PROFILE') or config.defaults().get('profile') if want_profile and config.has_section('profile %s' % want_profile): for opt in OPTIONS: try: OPTIONS[opt] = config.getboolean('profile %s' % want_profile, opt) except (ValueError, AttributeError): OPTIONS[opt] = config.get('profile %s' % want_profile, opt) else: for opt in OPTIONS: try: OPTIONS[opt] = config.getboolean('DEFAULT', opt) except (ValueError, AttributeError): OPTIONS[opt] = config.get('DEFAULT', opt) parser.set_defaults(**OPTIONS) args = parser.parse_args() if args.debug: LOG.basicConfig(stream=sys.stderr, format=LOG_FORMAT, datefmt=LOG_DATE_FMT, level=LOG.DEBUG) else: LOG.basicConfig(filename=LOG_FILE, format=LOG_FORMAT, datefmt=LOG_DATE_FMT, level=LOG.INFO) LOG.info("[alerta] endpoint=%s key=%s", args.endpoint, args.key) api = ApiClient(endpoint=args.endpoint, key=args.key, ssl_verify=args.sslverify) LOG.debug("[alerta] sendto=%s, summary=%s, body=%s", args.sendto, args.summary, args.body) try: alert = parse_zabbix(args.summary, args.body) api.send(alert) except (SystemExit, KeyboardInterrupt): LOG.warning("Exiting zabbix-alerta.") sys.exit(0) except Exception as e: LOG.error(e, exc_info=1) sys.exit(1)