def get_last_measure(self, store_key): q = Queue("%s/%s.q" % (self.home, store_key), max_length=1, overflow_mode='slide') msg = q.shift() if msg: return json.loads(msg.content)
def get_metrics_queue(config, _=None): q = Queue('%s/dashboard.q' % (config.temboard['home']), max_length=(config.plugins['dashboard']['history_length'] + 1), overflow_mode='slide') dm = DashboardMetrics() msg = q.get_last_message() msg['notifications'] = dm.get_notifications(config) return msg
def get_last_n(self, config, n): try: q_last = Queue(file_path='%s/notifications.q' % (config.temboard['home']), max_length=10 * 1024 * 1024, overflow_mode='slide') return q_last.get_last_n_messages(n) except (Exception) as e: raise NotificationError('Can not get last notifications: %s' % e.message)
def monitoring_sender_worker(app): config = app.config c = 0 logger.debug("Starting sender") q = Queue(os.path.join(config.temboard.home, 'metrics.q'), max_size=1024 * 1024 * 10, overflow_mode='slide') while True: # Let's do it smoothly.. time.sleep(0.5) msg = q.shift(delete=False) if msg is None: # If we get nothing from the queue then we get out from this while # loop. break try: # Try to send data to temboard collector API logger.debug("Trying to send data to collector") logger.debug(config.monitoring.collector_url) logger.debug(msg.content) send_output( config.monitoring.ssl_ca_cert_file, config.monitoring.collector_url, config.temboard.key, msg.content ) except HTTPError as e: # On error 409 (DB Integrity) we just drop the message and move to # the next message. if int(e.code) == 409: continue try: data = e.read() data = json.loads(data) message = data['error'] except Exception as e: logger.debug("Can't get error details: %s", e) message = str(e) logger.error("Failed to send data to collector: %s", message) logger.error("You should find details in temBoard UI logs.") raise Exception("Failed to send data to collector.") # If everything's fine then remove current msg from the queue # Integrity check is made using check_msg q.shift(delete=True, check_msg=msg) if c > 60: break c += 1 logger.debug("Done")
def supervision_sender_worker(commands, command, config): signal.signal(signal.SIGTERM, supervision_worker_sigterm_handler) start_time = time.time() * 1000 set_logger_name("supervision_sender_worker") logger = get_logger(config) # TODO: logging methods in supervision plugin must be aligned. logging.root = logger logger.info("Start pid=%s id=%s" % ( os.getpid(), command.commandid, )) command.state = COMMAND_START command.time = time.time() command.pid = os.getpid() commands.update(command) c = 0 while True: # Let's do it smoothly.. time.sleep(0.5) q = Queue('%s/metrics.q' % (config.temboard['home']), max_size=1024 * 1024 * 10, overflow_mode='slide') msg = q.shift(delete=False) if msg is None: break try: send_output(config.plugins['supervision']['ssl_ca_cert_file'], config.plugins['supervision']['collector_url'], config.plugins['supervision']['agent_key'], msg.content) except urllib2.HTTPError as e: logger.error("Failed to send data.") logger.debug(e.message) logger.info("End. Duration: %s." % (str(time.time() * 1000 - start_time))) # On an error 409 (DB Integrity) we need to remove the message. if int(e.code) != 409: return except Exception as e: logger.error("Failed to send data.") logger.debug(str(e)) logger.info("End. Duration: %s." % (str(time.time() * 1000 - start_time))) return _ = q.shift(delete=True, check_msg=msg) if c > 60: logger.info("End. Duration: %s." % (str(time.time() * 1000 - start_time))) return c += 1 logger.info("End. Duration: %s." % (str(time.time() * 1000 - start_time)))
def dashboard_collector_worker(app): logger.debug("Starting to collect dashboard data") data = metrics.get_metrics(app) # We don't want to store notifications in the history. data.pop('notifications', None) q = Queue(os.path.join(app.config.temboard.home, 'dashboard.q'), max_length=(app.config.dashboard.history_length + 1), overflow_mode='slide') q.push(Message(content=json.dumps(data))) logger.debug(data) logger.debug("End")
def push(self, config, notification): try: # Notifications are stored in a "sliding" queue. q = Queue(file_path='%s/notifications.q' % ( config.temboard['home']), max_size=10 * 1024 * 1024, # 10MB overflow_mode='slide') # Push the notification in the queue. q.push(Message(content=json.dumps({ 'date': notification.date, 'username': notification.username, 'message': notification.message}))) except (Exception) as e: raise NotificationError('Can not push new notification: %s' % e.message)
def monitoring_sender_worker(config): signal.signal(signal.SIGTERM, monitoring_worker_sigterm_handler) # convert config dict to namedtuple config = collections.namedtuple( '__config', ['temboard', 'plugins', 'postgresql', 'logging'])( temboard=config['temboard'], plugins=config['plugins'], postgresql=config['postgresql'], logging=config['logging']) c = 0 logger.debug("Starting sender") while True: # Let's do it smoothly.. time.sleep(0.5) q = Queue('%s/metrics.q' % (config.temboard['home']), max_size=1024 * 1024 * 10, overflow_mode='slide') msg = q.shift(delete=False) if msg is None: break try: send_output(config.plugins['monitoring']['ssl_ca_cert_file'], config.plugins['monitoring']['collector_url'], config.temboard['key'], msg.content) except urllib2.HTTPError as e: logger.exception(e) # On an error 409 (DB Integrity) we need to remove the message. if int(e.code) != 409: logger.error("Failed with code=%s message=%s" % (e.code, e.msg)) sys.exit(1) except Exception as e: logger.exception(e) logger.error("Failed") sys.exit(1) # If everything's fine then remove current msg from the queue q.shift(delete=True, check_msg=msg) if c > 60: break c += 1 logger.debug("Done")
def delta(self, key, current_values): """ Compute a delta between measures of two runs. Args: key (str): identify the values current_values (dict): mapping of latest measures Returns: a tuple of the time interval of the delta in seconds and a dict a delta with the same keys as the input. """ current_time = time.time() store_key = self.get_name() + key last_measure = self.get_last_measure(store_key) delta = (None, None) delta_value = None # Compute deltas and update last_* variables try: if last_measure: delta_time = current_time - last_measure['measure_time'] delta_values = {} for k in current_values.keys(): delta_value = current_values[k] - \ last_measure['measure'][k] if delta_value < 0: raise Exception('Negative delta value.') delta_values[k] = delta_value delta = (delta_time, delta_values) except Exception as e: delta = (None, None) try: q = Queue("%s/%s.q" % (self.home, store_key), max_length=1, overflow_mode='slide') q.push( Message(content=json.dumps({ 'measure_time': current_time, 'measure': dict(current_values) }))) except Exception as e: logger.error(str(e)) return delta
def dashboard_collector_worker(config): try: signal.signal(signal.SIGTERM, dashboard_worker_sigterm_handler) logger.debug("Collecting data") conn = connector( host=config['postgresql']['host'], port=config['postgresql']['port'], user=config['postgresql']['user'], password=config['postgresql']['password'], database=config['postgresql']['dbname'] ) conn.connect() # convert config dict to namedtuple config_nt = collections.namedtuple( '__config', ['temboard', 'plugins', 'postgresql', 'logging'] )( temboard=config['temboard'], plugins=config['plugins'], postgresql=config['postgresql'], logging=config['logging'] ) # Collect data data = metrics.get_metrics(conn, config_nt) conn.close() # We don't want to store notifications in the history. data.pop('notifications', None) q = Queue('%s/dashboard.q' % (config['temboard']['home']), max_length=(config['plugins']['dashboard']['history_length'] +1), overflow_mode='slide' ) q.push(Message(content=json.dumps(data))) logger.debug(data) logger.debug("End") except (error, Exception) as e: logger.error("Could not collect data") logger.exception(e) try: conn.close() except Exception: pass sys.exit(1)
def monitoring_collector_worker(app): """ Run probes and push collected metrics in a queue. """ logger.debug("Starting monitoring collector") config = app.config conninfo = dict( host=config.postgresql.host, port=config.postgresql.port, user=config.postgresql.user, database=config.postgresql.dbname, password=config.postgresql.password, dbnames=config.monitoring.dbnames, instance=config.postgresql.instance, ) system_info = host_info(config.temboard.hostname) # Load the probes to run probes = load_probes(config.monitoring, config.temboard.home) instance = instance_info(conninfo, system_info['hostname']) logger.debug("Running probes") # Gather the data from probes data = run_probes(probes, [instance]) # Prepare and send output output = dict( datetime=now(), hostinfo=system_info, instances=remove_passwords([instance]), data=data, version=__VERSION__, ) logger.debug(output) q = Queue(os.path.join(config.temboard.home, 'metrics.q'), max_size=1024 * 1024 * 10, overflow_mode='slide') q.push(Message(content=json.dumps(output))) logger.debug("Done")
def dashboard_collector_worker(commands, command, config): try: signal.signal(signal.SIGTERM, dashboard_worker_sigterm_handler) start_time = time.time() * 1000 set_logger_name("dashboard_collector") logger = get_logger(config) logger.debug("Starting with pid=%s" % (getpid())) logger.debug("commandid=%s" % (command.commandid)) command.state = COMMAND_START command.time = time.time() command.pid = getpid() commands.update(command) conn = connector(host=config.postgresql['host'], port=config.postgresql['port'], user=config.postgresql['user'], password=config.postgresql['password'], database=config.postgresql['dbname']) conn.connect() db_metrics = metrics.get_metrics(conn, config) # We don't want to store notifications in the history. db_metrics.pop('notifications', None) conn.close() q = Queue('%s/dashboard.q' % (config.temboard['home']), max_length=(config.plugins['dashboard']['history_length'] + 1), overflow_mode='slide') q.push(Message(content=json.dumps(db_metrics))) logger.debug("Duration: %s." % (str(time.time() * 1000 - start_time))) logger.debug("Done.") except (error, Exception) as e: logger.traceback(get_tb()) logger.error(str(e)) logger.debug("Failed.") try: conn.close() except Exception: pass sys.exit(1)
def supervision_collector_worker(commands, command, config): """ Run probes and push collected metrics in a queue. """ signal.signal(signal.SIGTERM, supervision_worker_sigterm_handler) start_time = time.time() * 1000 set_logger_name("supervision_collector_worker") logger = get_logger(config) # TODO: logging methods in supervision plugin must be aligned. logging.root = logger logger.info("Start pid=%s id=%s" % ( os.getpid(), command.commandid, )) command.state = COMMAND_START command.time = time.time() command.pid = os.getpid() commands.update(command) try: system_info = host_info(config.plugins['supervision']) except ValueError as e: logger.error( "supervision_collector_worker - unable to get system information: %s\n" % str(e)) sys.exit(1) # Load the probes to run probes = load_probes(config.plugins['supervision'], config.temboard['home']) config.plugins['supervision']['conninfo'] = [{ 'host': config.postgresql['host'], 'port': config.postgresql['port'], 'user': config.postgresql['user'], 'database': config.postgresql['dbname'], 'password': config.postgresql['password'], 'dbnames': config.plugins['supervision']['dbnames'], 'instance': config.postgresql['instance'] }] # Validate connection information from the config, and ensure # the instance is available instances = [] for conninfo in config.plugins['supervision']['conninfo']: logging.debug("Validate connection information on instance \"%s\"", conninfo['instance']) instances.append(instance_info(conninfo, system_info['hostname'])) # Gather the data from probes data = run_probes(probes, system_info['hostname'], instances) # Prepare and send output output = { 'datetime': now(), 'hostinfo': system_info, 'instances': remove_passwords(instances), 'data': data, 'version': __VERSION__ } q = Queue('%s/metrics.q' % (config.temboard['home']), max_size=1024 * 1024 * 10, overflow_mode='slide') q.push(Message(content=json.dumps(output))) logger.info("End. Duration: %s." % (str(time.time() * 1000 - start_time)))
def monitoring_collector_worker(config): """ Run probes and push collected metrics in a queue. """ signal.signal(signal.SIGTERM, monitoring_worker_sigterm_handler) # convert config dict to namedtuple config = collections.namedtuple( '__config', ['temboard', 'plugins', 'postgresql', 'logging'])( temboard=config['temboard'], plugins=config['plugins'], postgresql=config['postgresql'], logging=config['logging']) logger.debug("Starting collector") try: system_info = host_info(config.temboard['hostname']) except (ValueError, Exception) as e: logger.exception(e) logger.debug("Failed") sys.exit(1) # Load the probes to run try: probes = load_probes(config.plugins['monitoring'], config.temboard['home']) config.plugins['monitoring']['conninfo'] = [{ 'host': config.postgresql['host'], 'port': config.postgresql['port'], 'user': config.postgresql['user'], 'database': config.postgresql['dbname'], 'password': config.postgresql['password'], 'dbnames': config.plugins['monitoring']['dbnames'], 'instance': config.postgresql['instance'] }] # Validate connection information from the config, and ensure # the instance is available instances = [] for conninfo in config.plugins['monitoring']['conninfo']: instances.append(instance_info(conninfo, system_info['hostname'])) logger.debug("Running probes") # Gather the data from probes data = run_probes(probes, instances) # Prepare and send output output = { 'datetime': now(), 'hostinfo': system_info, 'instances': remove_passwords(instances), 'data': data, 'version': __VERSION__ } logger.debug(output) q = Queue('%s/metrics.q' % (config.temboard['home']), max_size=1024 * 1024 * 10, overflow_mode='slide') q.push(Message(content=json.dumps(output))) logger.debug("Done") except Exception as e: logger.exception(e) logger.error("Could not collect data") sys.exit(1)
def get_history_metrics_queue(config, _=None): q = Queue('%s/dashboard.q' % (config.temboard['home']), max_length=(config.plugins['dashboard']['history_length'] + 1), overflow_mode='slide') return q.get_content_all_messages()
def get_metrics_queue(config): q = Queue(os.path.join(config.temboard.home, 'dashboard.q')) dm = DashboardMetrics() msg = q.get_last_message() msg['notifications'] = dm.get_notifications(config) return msg
def get_history_metrics_queue(config): q = Queue(os.path.join(config.temboard.home, 'dashboard.q')) return q.get_content_all_messages()
def get_history_metrics_queue(config, _=None): q = Queue('%s/dashboard.q' % (config.temboard['home'])) return q.get_content_all_messages()
def get_metrics_queue(config, _=None): q = Queue('%s/dashboard.q' % (config.temboard['home'])) dm = DashboardMetrics() msg = q.get_last_message() msg['notifications'] = dm.get_notifications(config) return msg