def run(self): to_delta = {} stat = open('/proc/stat') for line in stat: cols = line.split() if len(cols) == 0: continue if cols[0] == 'cpu': # Convert values to int then in milliseconds, to_delta = { 'time_user': ((int(cols[1]) + int(cols[2])) * 1000 / self.hz), 'time_system': (int(cols[3]) + int(cols[6]) + int(cols[7])) * 1000 / self.hz, 'time_idle': int(cols[4]) * 1000 / self.hz, 'time_iowait': int(cols[5]) * 1000 / self.hz, 'time_steal': int(cols[8]) * 1000 / self.hz } stat.close() # Compute deltas for values of /proc/stat since boot time (interval, metrics) = self.delta('global', to_delta) # No deltas on the first call if interval is None: return [] metrics['measure_interval'] = interval metrics['datetime'] = now() metrics['cpu'] = 'global' return [metrics]
def get_statements(http_context, app): """Return a snapshot of latest statistics of executed SQL statements """ config = app.config dbname = config.statements.dbname assert dbname == "postgres", dbname snapshot_datetime = now() conninfo = dict(config.postgresql, dbname=dbname) try: with Postgres(**conninfo).connect() as conn: data = list(conn.query(query)) except Exception as e: pg_version = app.postgres.fetch_version() if (pg_version < 90600 or 'relation "pg_stat_statements" does not exist' in str(e)): raise HTTPError( 404, "pg_stat_statements not enabled on database %s" % dbname) logger.error( "Failed to get pg_stat_statements data on database %s: %s", dbname, e, ) raise HTTPError(500, e) else: return {"snapshot_datetime": snapshot_datetime, "data": data}
def run(self): # Everything is already gathered in the inventory, just add # the time out = [] datetime = now() sysinfo = SysInfo() for fs in sysinfo.file_systems(): fs['datetime'] = datetime out.append(fs) return out
def monitoring_collector_worker(app): """ Run probes and push collected metrics in a queue. """ logger.debug("Starting monitoring collector") config = app.config conninfo = dict( host=config.postgresql.host, port=config.postgresql.port, user=config.postgresql.user, database=config.postgresql.dbname, password=config.postgresql.password, dbnames=config.monitoring.dbnames, instance=config.postgresql.instance, ) system_info = host_info(config.temboard.hostname) # Load the probes to run probes = load_probes( config.monitoring, config.temboard.home ) instance = instance_info(conninfo, system_info['hostname']) logger.debug("Running probes") # Gather the data from probes data = run_probes(probes, [instance]) # Prepare and send output output = dict( datetime=now(), hostinfo=system_info, instances=remove_passwords([instance]), data=data, version=__VERSION__, ) logger.debug(output) # Add data to metrics table db.add_metric( config.temboard.home, 'monitoring.db', time.time(), output ) logger.debug("Done")
def monitoring_collector_worker(app): """ Run probes and push collected metrics in a queue. """ logger.debug("Starting monitoring collector") config = app.config conninfo = dict( host=config.postgresql.host, port=config.postgresql.port, user=config.postgresql.user, database=config.postgresql.dbname, password=config.postgresql.password, dbnames=config.monitoring.dbnames, instance=config.postgresql.instance, ) system_info = host_info(config.temboard.hostname) # Load the probes to run probes = load_probes(config.monitoring, config.temboard.home) instance = instance_info(conninfo, system_info['hostname']) logger.debug("Running probes") # Gather the data from probes data = run_probes(probes, [instance]) # Prepare and send output output = dict( datetime=now(), hostinfo=system_info, instances=remove_passwords([instance]), data=data, version=__VERSION__, ) logger.debug(output) q = Queue(os.path.join(config.temboard.home, 'metrics.q'), max_size=1024 * 1024 * 10, overflow_mode='slide') q.push(Message(content=json.dumps(output))) logger.debug("Done")
def monitoring_collector_worker(config): """ Run probes and push collected metrics in a queue. """ signal.signal(signal.SIGTERM, monitoring_worker_sigterm_handler) # convert config dict to namedtuple config = collections.namedtuple( '__config', ['temboard', 'plugins', 'postgresql', 'logging'])( temboard=config['temboard'], plugins=config['plugins'], postgresql=config['postgresql'], logging=config['logging']) logger.debug("Starting collector") try: system_info = host_info(config.temboard['hostname']) except (ValueError, Exception) as e: logger.exception(e) logger.debug("Failed") sys.exit(1) # Load the probes to run try: probes = load_probes(config.plugins['monitoring'], config.temboard['home']) config.plugins['monitoring']['conninfo'] = [{ 'host': config.postgresql['host'], 'port': config.postgresql['port'], 'user': config.postgresql['user'], 'database': config.postgresql['dbname'], 'password': config.postgresql['password'], 'dbnames': config.plugins['monitoring']['dbnames'], 'instance': config.postgresql['instance'] }] # Validate connection information from the config, and ensure # the instance is available instances = [] for conninfo in config.plugins['monitoring']['conninfo']: instances.append(instance_info(conninfo, system_info['hostname'])) logger.debug("Running probes") # Gather the data from probes data = run_probes(probes, instances) # Prepare and send output output = { 'datetime': now(), 'hostinfo': system_info, 'instances': remove_passwords(instances), 'data': data, 'version': __VERSION__ } logger.debug(output) q = Queue('%s/metrics.q' % (config.temboard['home']), max_size=1024 * 1024 * 10, overflow_mode='slide') q.push(Message(content=json.dumps(output))) logger.debug("Done") except Exception as e: logger.exception(e) logger.error("Could not collect data") sys.exit(1)
def run(self, conninfo): version = self.get_version(conninfo) if conninfo['standby']: return [] metric = { 'datetime': now(), 'port': conninfo['port'] } if version < 100000: sql = """ SELECT count(s.f) AS total, sum((pg_stat_file('pg_xlog/'||s.f)).size) AS total_size, pg_current_xlog_location() as current_location FROM pg_ls_dir('pg_xlog') AS s(f) WHERE f ~ E'^[0-9A-F]{24}$' """ else: sql = """ SELECT count(s.f) AS total, sum((pg_stat_file('pg_wal/'||s.f)).size) AS total_size, pg_current_wal_lsn() as current_location FROM pg_ls_dir('pg_wal') AS s(f) WHERE f ~ E'^[0-9A-F]{24}$' """ rows = self.run_sql(conninfo, sql) metric['total'] = rows[0]['total'] metric['total_size'] = rows[0]['total_size'] metric['current_location'] = rows[0]['current_location'] if version < 100000: sql = """ SELECT count(s.f) AS archive_ready FROM pg_ls_dir('pg_xlog/archive_status') AS s(f) WHERE f ~ E'\.ready$' """ # noqa W605 else: sql = """ SELECT count(s.f) AS archive_ready FROM pg_ls_dir('pg_wal/archive_status') AS s(f) WHERE f ~ E'\.ready$' """ # noqa W605 rows = self.run_sql(conninfo, sql) metric['archive_ready'] = rows[0]['archive_ready'] # Calcutate the written size by using the delta between the # position between to runs. The current xlog location must be # converted to an number first m = re.match(r'^([0-9A-F]+)/([0-9A-F]+)$', metric['current_location']) if m: current = int("0xff000000", 0) * \ int("0x" + m.group(1), 0) + int("0x" + m.group(2), 0) else: logger.error("Unable to convert xlog location to a number") return [] (interval, delta) = self.delta(conninfo['instance'].replace('/', ''), {'written_size': current}) # Empty the first time if interval is None: return [] metric['measure_interval'] = interval metric.update(delta) return [metric]
def supervision_collector_worker(commands, command, config): """ Run probes and push collected metrics in a queue. """ signal.signal(signal.SIGTERM, supervision_worker_sigterm_handler) start_time = time.time() * 1000 set_logger_name("supervision_collector_worker") logger = get_logger(config) # TODO: logging methods in supervision plugin must be aligned. logging.root = logger logger.debug("Starting with pid=%s" % (os.getpid())) logger.debug("commandid=%s" % (command.commandid)) command.state = COMMAND_START command.time = time.time() try: command.pid = os.getpid() commands.update(command) system_info = host_info(config.temboard['hostname']) except (ValueError, Exception) as e: logger.traceback(get_tb()) logger.error(str(e)) logger.debug("Failed.") sys.exit(1) # Load the probes to run try: probes = load_probes(config.plugins['supervision'], config.temboard['home']) config.plugins['supervision']['conninfo'] = [{ 'host': config.postgresql['host'], 'port': config.postgresql['port'], 'user': config.postgresql['user'], 'database': config.postgresql['dbname'], 'password': config.postgresql['password'], 'dbnames': config.plugins['supervision']['dbnames'], 'instance': config.postgresql['instance'] }] # Validate connection information from the config, and ensure # the instance is available instances = [] for conninfo in config.plugins['supervision']['conninfo']: logging.debug("Validate connection information on instance \"%s\"", conninfo['instance']) instances.append(instance_info(conninfo, system_info['hostname'])) # Gather the data from probes data = run_probes(probes, instances) # Prepare and send output output = { 'datetime': now(), 'hostinfo': system_info, 'instances': remove_passwords(instances), 'data': data, 'version': __VERSION__ } logger.debug("Collected data: %s" % (output)) q = Queue('%s/metrics.q' % (config.temboard['home']), max_size=1024 * 1024 * 10, overflow_mode='slide') q.push(Message(content=json.dumps(output))) except Exception as e: logger.traceback(get_tb()) logger.error(str(e)) logger.debug("Failed.") sys.exit(1) logger.debug("Duration: %s." % (str(time.time() * 1000 - start_time))) logger.debug("Done.")