def update_gauges(metrics): metric_dict = {} for (name_list, label_dict, value) in metrics: metric_name = format_metric_name(name_list) if metric_name not in metric_dict: metric_dict[metric_name] = (tuple(label_dict.keys()), {}) label_keys = metric_dict[metric_name][0] label_values = tuple([ format_label_value(label_dict[key]) for key in label_keys ]) metric_dict[metric_name][1][label_values] = value for metric_name, (label_keys, value_dict) in metric_dict.items(): if metric_name in gauges: (old_label_values_set, gauge) = gauges[metric_name] else: old_label_values_set = set() gauge = Gauge(metric_name, '', label_keys) new_label_values_set = set(value_dict.keys()) for label_values in old_label_values_set - new_label_values_set: gauge.remove(*label_values) for label_values, value in value_dict.items(): if label_values: gauge.labels(*label_values).set(value) else: gauge.set(value) gauges[metric_name] = (new_label_values_set, gauge)
class QueryMetric(object): def __init__(self, name, cfg, verbose): self.name = name description = cfg['description'] self.query = cfg['query'] labels = cfg['labels'] # Let prometheus_client handle name validation self.prometheus_gauge = Gauge(self.name, description, labels) self.verbose = verbose # HQL count won't return 0 so need to explicitly delete labelsets self.labelsets = set() def update(self, queryservice): results = queryservice.projection( self.query, None, {'omero.group': '-1'}) if not results: if self.verbose: print('%s NULL' % self.name) prev_labelsets = self.labelsets self.labelsets = set() for r in results: labelvalues = [lv for lv in unwrap(r[1:])] value = unwrap(r[0]) self.prometheus_gauge.labels(*labelvalues).set(value) self.labelsets.add(tuple(labelvalues)) if self.verbose: print('%s %s %s' % (self.name, labelvalues, value)) # Now delete absent labelsets for rm in prev_labelsets.difference(self.labelsets): self.prometheus_gauge.remove(*rm) if self.verbose: print('Removed %s %s' % (self.name, rm))
class PrometheusEndpoint(BaseFlatliner): def __init__(self, pruning_interval: int = 300): super().__init__() self.pruning_interval = pruning_interval _LOGGER.info( "Prometheus Endpoint initialized. Metric pruning interval is {0} seconds" .format(self.pruning_interval)) self.published_metric_timestamps = defaultdict(list) # This is the gauge metric where the metric data is published self.weirdness_score_gauge = Gauge( 'weirdness_score', 'Weirdness score for the given Cluster and Version', ['cluster', 'version']) def on_next(self, x): try: # update the published metrics self.weirdness_score_gauge.labels(cluster=str(x.cluster), version=str(x.version)).set( x.weirdness_score) # Store timestamp when the metric was published and metric version info self.published_metric_timestamps[str( x.cluster)] = [int(time()), str(x.version)] except Exception as e: _LOGGER.error( "Couldn't process the following packet {0}. Reason: {1}". format(x, str(e))) raise e def _delete_stale_metrics(self): ''' This function will remove any metric that was published $(pruning_interval) seconds ago or older ''' timestamp_threshold = int(time()) - self.pruning_interval for cluster_id in list(self.published_metric_timestamps): if self.published_metric_timestamps[cluster_id][ 0] < timestamp_threshold: # if metric is stale, stop publishing it self.weirdness_score_gauge.remove( cluster_id, self.published_metric_timestamps[cluster_id][1]) del self.published_metric_timestamps[cluster_id] def start_server(self): # Start http server to expose metrics http_server_port = 8000 start_http_server(http_server_port) _LOGGER.info( "http server started on port {0}".format(http_server_port)) while True: # delete stale exposed metrics self._delete_stale_metrics() _LOGGER.debug("Next metric pruning will be in {} seconds".format( self.pruning_interval)) sleep(self.pruning_interval)
def update_gauges(metrics): metric_dict = {} for (name_list, label_dict, value) in metrics: metric_name = format_metric_name(name_list) if metric_name not in metric_dict: metric_dict[metric_name] = (tuple(label_dict.keys()), {}) label_keys = metric_dict[metric_name][0] label_values = tuple( [format_label_value(label_dict[key]) for key in label_keys]) metric_dict[metric_name][1][label_values] = value for metric_name, (label_keys, value_dict) in metric_dict.items(): if metric_name in gauges: (old_label_values_set, gauge) = gauges[metric_name] else: old_label_values_set = set() gauge = Gauge(metric_name, '', label_keys) new_label_values_set = set(value_dict.keys()) for label_values in old_label_values_set - new_label_values_set: gauge.remove(*label_values) for label_values, value in value_dict.items(): if label_values: gauge.labels(*label_values).set(value) else: gauge.set(value) gauges[metric_name] = (new_label_values_set, gauge)
class ManilaShareServerNanny(ManilaNanny): """ Manila Share Server """ def __init__(self, config_file, interval, prom_port, http_port, handler): super(ManilaShareServerNanny, self).__init__(config_file, interval, prom_port=prom_port, http_port=http_port, handler=handler) self.orphan_snapshots_lock = Lock() self.orphan_snapshots: Dict[str, Dict[str, str]] = {} self.orphan_snapshots_gauge = Gauge( 'manila_nanny_orphan_share_snapshots', 'Orphan Manila Share Snapshots', ['share_id', 'snapshot_id']) def _run(self): s = self.query_orphan_snapshots() orphan_snapshots = { snapshot_id: { 'snapshot_id': snapshot_id, 'share_id': share_id } for snapshot_id, share_id in s } for snapshot_id in orphan_snapshots: share_id = orphan_snapshots[snapshot_id]['share_id'] self.orphan_snapshots_gauge.labels(share_id=share_id, snapshot_id=snapshot_id).set(1) for snapshot_id in self.orphan_snapshots: if snapshot_id not in orphan_snapshots: share_id = self.orphan_snapshots[snapshot_id]['share_id'] self.orphan_snapshots_gauge.remove(share_id, snapshot_id) with self.orphan_snapshots_lock: self.orphan_snapshots = update_records(self.orphan_snapshots, orphan_snapshots) def query_orphan_snapshots(self): Snapshots = Table('share_snapshots', self.db_metadata, autoload=True) Shares = Table('shares', self.db_metadata, autoload=True) q = select([Snapshots.c.id, Snapshots.c.share_id])\ .select_from(Snapshots.join(Shares, Snapshots.c.share_id == Shares.c.id))\ .where(Snapshots.c.deleted == 'False')\ .where(Shares.c.deleted != 'False') return list(q.execute()) @response def get_orphan_snapshots(self): with self.orphan_snapshots_lock: return list(self.orphan_snapshots.values())
class metric_label: def __init__(self, name, label, value=None, description=None): self.name = name self.values = dict() self.label_values = list() self.label = label if description is None: description = name.replace("_", " ") self.metric = Gauge(name.lower(), description, [label]) if not value is None: self.metric.labels(label).set(value) def update(self, value, remove_labels=True): removable_labels = list() if len(self.values) < 1: remove_labels = False for label in value: self.values[label] = value[label] self.metric.labels(label).set(value[label]) if label not in self.label_values: self.label_values.append(label) for label in self.label_values: if not label in value: self.metric.labels(label).set(0) self.values[label] = 0 if self.values[label] == 0: removable_labels.append(label) if remove_labels: for l in removable_labels: if l in self.label_values: self.metric.remove(l) del self.values[l] self.label_values.remove(l) def get_value(self): return self.values def get_label_values(self): return self.label_values def get_label(self): return self.label
def update_gauges(metrics): metric_dict = group_metrics(metrics) for metric_name, (label_keys, value_dict) in metric_dict.items(): if metric_name in gauges: (old_label_values_set, gauge) = gauges[metric_name] else: old_label_values_set = set() gauge = Gauge(metric_name, '', label_keys) new_label_values_set = set(value_dict.keys()) for label_values in old_label_values_set - new_label_values_set: gauge.remove(*label_values) for label_values, value in value_dict.items(): if label_values: gauge.labels(*label_values).set(value) else: gauge.set(value) gauges[metric_name] = (new_label_values_set, gauge)
class MetricsContainer(object): def __init__(self, reader): super(MetricsContainer, self).__init__() self.logger = logging.getLogger(self.__class__.__name__) self.reader = reader self.registry = UpdatingRegistryCollector() self.registry.on_collect = self.update self.min_delay_time = datetime.timedelta(seconds=5) self.last_update = None self.last_job_names = set() self._job_metrics = [] self._create_job_metric("schedule_time", "schedule_seconds", "Job schedule time") self._create_job_metric("start_time", "start_seconds", "Job start time") self._create_job_metric("end_time", "end_seconds", "Job end time") self._create_job_metric("real_end_time", "real_end_seconds", "Job real end time") self._create_job_metric("files", "files_count", "Number of files fetched in job") self._create_job_metric("bytes", "size_bytes", "Size of job data") self._create_job_metric("status", "status", "Job status") self._create_job_metric("level", "level", "Job level") self._create_job_metric("id", "id", "Job id") self.m_job_bytes_total = Gauge('bacula_job_bytes_total', 'Total size of job', registry=self.registry, labelnames=["name"]) def _create_job_metric(self, model_field, name, description): m = Gauge('bacula_finished_job_%s' % name, description, registry=self.registry, labelnames=["name"]) def update(model): v = model[model_field] v = 'nan' if v is None else v m.labels(model["name"]).set(v) def remove(name): m.remove([name]) self._job_metrics.append((update, remove)) def update(self): if self.last_update is not None and ( datetime.datetime.now() - self.last_update) <= self.min_delay_time: return self.logger.info("Updating metrics") job_names = set() for job in self.reader.list_global_finished_jobs(): job_names.add(job["name"]) for updater, _remover in self._job_metrics: updater(job) stats = self.reader.get_global_stats() for k, v in stats['disk_used_per_job'].items(): self.m_job_bytes_total.labels(k).set(v) for i in self.last_job_names.difference(job_names): self.logger.debug("Removing job %s from metrics", i) for _updater, remover in self._job_metrics: remover(i) self.m_job_bytes_total.remove([i]) self.last_job_names = job_names self.last_update = datetime.datetime.now()
id=id, version=version, ip=ipAddress).set(nodeinfo[id]["cpuUsage"]) memoryFreeKB.labels(name=name, id=id, version=version, ip=ipAddress).set( nodeinfo[id]["memoryFreeKB"]) storageFreeKB.labels(name=name, id=id, version=version, ip=ipAddress).set( nodeinfo[id]["storageFreeKB"]) edgeItems.append([name, id, version, ipAddress]) try: previousItems.remove([name, id, version, ipAddress]) except: pass logging.debug("label collection to remove: %s", previousItems) for item in previousItems: totalMemoryKB.remove(*item) totalStorageKB.remove(*item) cpuUsage.remove(*item) memoryFreeKB.remove(*item) storageFreeKB.remove(*item) previousItems = edgeItems else: logging.error("edges error: %s", response.content) time.sleep(interval)
except Exception as e: print(f"Got a mystery error for {name}:") pprint(e) if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) start_http_server(9402) with open(sys.argv[1]) as channel_data: channels = json.load(channel_data) revisions = {} while True: for (channel, about) in channels.items(): measurement = measure_channel(channel) if measurement is not None: revision = measurement['revision'] status = about.get('status', '') variant = about.get('variant', '') current = int(status != 'unmaintained') CHANNEL_UPDATE_TIME.labels(channel=channel).set(measurement['timestamp']) CHANNEL_REVISION.labels(channel=channel, revision=revision, status=status, variant=variant, current=current).set(1) CHANNEL_CURRENT.labels(channel=channel).set(current) print('updated {}'.format(channel)) previous_revision = revisions.pop(channel, None) revisions[channel] = revision if previous_revision and previous_revision != revision: CHANNEL_REVISION.remove(channel, previous_revision, status, variant, current)
class Exporter(): def __init__(self): self.basebackup_exception = False self.xlog_exception = False self.bbs = [] self.last_archive_check = None self.archive_status = None # Declare metrics self.basebackup = Gauge('walg_basebackup', 'Remote Basebackups', ['start_wal_segment', 'start_lsn']) self.basebackup_count = Gauge('walg_basebackup_count', 'Remote Basebackups count') self.basebackup_count.set_function(lambda: len(self.bbs)) self.last_upload = Gauge('walg_last_upload', 'Last upload of incremental or full backup', ['type']) self.last_upload.labels('xlog').set_function( self.last_xlog_upload_callback) self.last_upload.labels('basebackup').set_function( lambda: self.bbs[len(self.bbs) - 1]['start_time'].timestamp() if self.bbs else 0 ) self.oldest_basebackup = Gauge('walg_oldest_basebackup', 'oldest full backup') self.oldest_basebackup.set_function( lambda: self.bbs[0]['start_time'].timestamp() if self.bbs else 0 ) self.xlog_ready = Gauge('walg_missing_remote_wal_segment_at_end', 'Xlog ready for upload') self.xlog_ready.set_function(self.xlog_ready_callback) self.exception = Gauge('walg_exception', 'Wal-g exception: 2 for basebackup error, ' '3 for xlog error and ' '5 for remote error') self.exception.set_function( lambda: (1 if self.basebackup_exception else 0 + 2 if self.xlog_exception else 0)) self.xlog_since_last_bb = Gauge('walg_xlogs_since_basebackup', 'Xlog uploaded since last base backup') self.xlog_since_last_bb.set_function(self.xlog_since_last_bb_callback) self.last_backup_duration = Gauge('walg_last_backup_duration', 'Duration of the last full backup') self.last_backup_duration.set_function( lambda: ((self.bbs[len(self.bbs) - 1]['finish_time'] - self.bbs[len(self.bbs) - 1]['start_time']).total_seconds() if self.bbs else 0) ) self.walg_backup_fuse = Gauge('walg_backup_fuse',"0 backup fuse is OK, 1 backup fuse is burnt") self.walg_backup_fuse.set_function(self.backup_fuse_callback) # Fetch remote base backups self.update_basebackup() def update_basebackup(self, *unused): """ When this script receive a SIGHUP signal, it will call backup-list and update metrics about basebackups """ info('Updating basebackups metrics...') try: # Fetch remote backup list res = subprocess.run(["wal-g", "backup-list", "--detail", "--json"], capture_output=True, check=True) new_bbs = list(map(format_date, json.loads(res.stdout))) new_bbs.sort(key=lambda bb: bb['start_time']) new_bbs_name = [bb['backup_name'] for bb in new_bbs] old_bbs_name = [bb['backup_name'] for bb in self.bbs] bb_deleted = 0 # Remove metrics for deleted backups for bb in self.bbs: if bb['backup_name'] not in new_bbs_name: # Backup deleted self.basebackup.remove(bb['wal_file_name'], bb['start_lsn']) bb_deleted = bb_deleted + 1 # Add metrics for new backups for bb in new_bbs: if bb['backup_name'] not in old_bbs_name: (self.basebackup.labels(bb['wal_file_name'], bb['start_lsn']) .set(bb['start_time'].timestamp())) # Update backup list self.bbs = new_bbs info("%s basebackups found (first: %s, last: %s), %s deleted", len(self.bbs), self.bbs[0]['start_time'], self.bbs[len(self.bbs) - 1]['start_time'], bb_deleted) self.basebackup_exception = False except subprocess.CalledProcessError as e: error(e) self.basebackup_exception = True def last_archive_status(self): if (self.last_archive_check is None or datetime.datetime.now().timestamp() - self.last_archive_check > 1): self.archive_status = self._last_archive_status() self.last_archive_check = datetime.datetime.now().timestamp() return self.archive_status def _last_archive_status(self): with psycopg2.connect( host=os.getenv('PGHOST', 'localhost'), port=os.getenv('PGPORT', '5432'), user=os.getenv('PGUSER', 'postgres'), password=os.getenv('PGPASSWORD'), dbname=os.getenv('PGDATABASE', 'postgres'), ) as db_connection: db_connection.autocommit = True with db_connection.cursor(cursor_factory=DictCursor) as c: c.execute('SELECT archived_count, failed_count, ' 'last_archived_wal, ' 'last_archived_time, ' 'last_failed_wal, ' 'last_failed_time ' 'FROM pg_stat_archiver') res = c.fetchone() if not bool(result): raise Exception("Cannot fetch archive status") return res def last_xlog_upload_callback(self): archive_status = self.last_archive_status() return archive_status['last_archived_time'].timestamp() def xlog_ready_callback(self): res = 0 try: for f in os.listdir(archive_dir): # search for xlog waiting for upload if READY_WAL_RE.match(f): res += 1 self.xlog_exception = 0 except FileNotFoundError: self.xlog_exception = 1 return res def xlog_since_last_bb_callback(self): # Compute xlog_since_last_basebackup if self.bbs: archive_status = self.last_archive_status() return wal_diff(archive_status['last_archived_wal'], self.bbs[len(self.bbs) - 1]['wal_file_name']) else: return 0 def backup_fuse_callback(self): return int(os.path.exists('/tmp/failed_pg_archive'))
(gwid, devices[gwid]['name'], on, w, mA, V)) label_values = [ devices[gwid]['name'], gwid, data['ip'], data['version'] ] gs.labels(*label_values).set(on) gp.labels(*label_values).set(w) ga.labels(*label_values).set(mA / 1000) gv.labels(*label_values).set(V) devices[gwid]['lastseen'] = time.time() else: print("Error: %s %s. Wrong device key?" % (err, gwid)) # cleanup metrics if device offline for more than 30 sec for d in devices: if devices[d][ 'lastseen'] and devices[d]['lastseen'] < time.time() - 30: print("device %s (%s) gone offline" % (devices[d]['id'], devices[d]['name'])) devices[d]['lastseen'] = False label_values = [ devices[d]['name'], d, devices[d]['ip'], devices[d]['version'] ] gs.remove(*label_values) gp.remove(*label_values) ga.remove(*label_values) gv.remove(*label_values)
class ManilaShareSyncNanny(ManilaNanny): def __init__(self, config_file, prom_host, interval, tasks, dry_run_tasks, prom_port, http_port, handler): super(ManilaShareSyncNanny, self).__init__(config_file, interval, prom_port=prom_port, http_port=http_port, handler=handler) self.prom_host = prom_host + "/api/v1/query" self.MANILA_NANNY_SHARE_SYNC_FAILURE = Counter( 'manila_nanny_share_sync_failure', '') self.MANILA_SYNC_SHARE_SIZE_COUNTER = Counter( 'manila_nanny_sync_share_size', 'manila nanny sync share size') self.MANILA_RESET_SHARE_ERROR_COUNTER = Counter( 'manila_nanny_reset_share_error', 'manila nanny reset share status to error') self.manila_missing_volume_shares_gauge = Gauge( 'manila_nanny_share_missing_volume', 'Manila Share missing backend volume', ['share_id', 'instance_id', 'share_name', 'share_status']) self.manila_orphan_volumes_gauge = Gauge( 'manila_nanny_orphan_volumes', 'Orphan backend volumes of Manila service', ['share_id', 'share_status', 'filer', 'vserver', 'volume']) self.manila_offline_volumes_gauge = Gauge( 'manila_nanny_offline_volumes', 'Offline volumes of Manila service', ['share_id', 'share_status', 'filer', 'vserver', 'volume']) self._tasks = tasks self._dry_run_tasks = dry_run_tasks if not any(tasks.values()): raise Exception('All tasks are disabled') self.orphan_volumes_lock = Lock() self.orphan_volumes = {} self.missing_volumes_lock = Lock() self.missing_volumes = {} self.offline_volumes_lock = Lock() self.offline_volumes = {} def _run(self): # Need to recreate manila client each run, because of session timeout # self.renew_manila_client() # fetch data try: if self._tasks[TASK_SHARE_SIZE] or self._tasks[TASK_MISSING_VOLUME]\ or self._tasks[TASK_ORPHAN_VOLUME]: _share_list = self._query_shares() _volume_list = self._get_netapp_volumes() _shares, _orphan_volumes = self._merge_share_and_volumes( _share_list, _volume_list) if self._tasks[TASK_OFFLINE_VOLUME]: _offline_volume_list = self._get_netapp_volumes('offline') except Exception as e: log.warning(e) self.MANILA_NANNY_SHARE_SYNC_FAILURE.inc() return if self._tasks[TASK_SHARE_SIZE]: dry_run = self._dry_run_tasks[TASK_SHARE_SIZE] self.sync_share_size(_shares, dry_run) if self._tasks[TASK_MISSING_VOLUME]: dry_run = self._dry_run_tasks[TASK_MISSING_VOLUME] self.process_missing_volume(_shares, dry_run) if self._tasks[TASK_ORPHAN_VOLUME]: dry_run = self._dry_run_tasks[TASK_ORPHAN_VOLUME] self.process_orphan_volumes(_orphan_volumes, dry_run) if self._tasks[TASK_OFFLINE_VOLUME]: dry_run = self._dry_run_tasks[TASK_OFFLINE_VOLUME] self.process_offline_volumes(_offline_volume_list, dry_run) def sync_share_size(self, shares, dry_run=True): """ Backend volume exists, but share size does not match """ msg = "share %s: share size != netapp volume size (%d != %d)" msg_dry_run = "Dry run: " + msg for (share_id, _), share in shares.items(): if 'volume' not in share: continue size, vsize = share['size'], share['volume']['size'] # volume size can not be zero, could be in offline state if vsize == 0: continue if share['updated_at'] is not None: if is_utcts_recent(share['updated_at'], 3600): continue if size != vsize: if dry_run: log.info(msg_dry_run, share_id, size, vsize) else: log.info(msg, share_id, size, vsize) self.set_share_size(share_id, vsize) self.MANILA_SYNC_SHARE_SIZE_COUNTER.inc() def process_missing_volume(self, shares, dry_run=True): """ Set share state to error when backend volume is missing Ignore shares that are created/updated within 6 hours. """ missing_volumes = {} for (share_id, instance_id), share in shares.items(): if 'volume' not in share: # check if shares are created/updated recently if is_utcts_recent(share['updated_at'] or share['created_at'], 6 * 3600): continue share_name = share['name'] share_status = share['status'] msg = f'ManilaShareMissingVolume: share={share_id}, '\ f'instance={instance_id}, status={share_status}' if not dry_run: if share_status == 'available': self._reset_share_state(share_id, 'error') share_status = 'error' msg = f'ManilaShareMissingVolume: Set share {share_id} to error' else: msg = 'Dry run: ' + msg log.info(msg) self.manila_missing_volume_shares_gauge.labels( share_id=share_id, instance_id=instance_id, share_name=share_name, share_status=share_status, ).set(1) missing_volumes[(share_id, instance_id)] = { 'share_id': share_id, 'instance_id': instance_id, 'share_name': share_name, 'share_status': share_status, } for (share_id, instance_id) in self.missing_volumes: s = self.missing_volumes[(share_id, instance_id)] share_name, share_status = s['share_name'], s['share_status'] if (share_id, instance_id) not in shares: self.manila_missing_volume_shares_gauge.remove( share_id, instance_id, share_name, share_status) with self.missing_volumes_lock: self.missing_volumes = update_records(self.missing_volumes, missing_volumes) def process_offline_volumes(self, offline_volume_list, dry_run=True): """ offline volume @params offline_volumes: List[Volume] Volume: Dict[Keys['volume', 'vserver', 'filer'], Any] """ _offline_volumes = {} for vol in offline_volume_list: if vol['volume'].startswith('share'): instance_id = vol['volume'][6:].replace('_', '-') _offline_volumes[instance_id] = vol # find associated share for offline volumes _shares = self._query_shares_by_instance_ids( list(_offline_volumes.keys())) for s in _shares: instance_id = s['instance_id'] if instance_id in _offline_volumes: _offline_volumes[instance_id].update({'share': s}) # ignore the shares that are updated/deleted recently _offline_volume_keys = list(_offline_volumes.keys()) for vol_key, vol in _offline_volumes.items(): share = vol.get('share') if share is not None: if share['deleted_at'] or share['updated_at']: if is_utcts_recent( share['deleted_at'] or share['updated_at'], 6 * 3600): _offline_volume_keys.remove(vol_key) # process remaining volume offline_volumes = {} for vol_key in _offline_volume_keys: vol = _offline_volumes[vol_key] name, filer, vserver = vol['volume'], vol['filer'], vol['vserver'] share = vol.get('share') if share is not None: share_id, status = share['share_id'], share['status'] else: share_id, status = '', '' self.manila_offline_volumes_gauge.labels( share_id=share_id, share_status=status, volume=name, vserver=vserver, filer=filer, ).set(1) offline_volumes[name] = { 'volume': name, 'filer': filer, 'vserver': vserver, 'share_id': share_id, 'status': status, } for volname, vol in self.offline_volumes.items(): if volname not in offline_volumes: self.manila_offline_volumes_gauge.remove( vol['share_id'], vol['status'], vol['filer'], vol['vserver'], vol['name']) with self.offline_volumes_lock: self.offline_volumes = update_records(self.offline_volumes, offline_volumes) def process_orphan_volumes(self, volumes, dry_run=True): """ orphan volumes Check if the corresponding manila shares are deleted recently (hard coded as 6 hours). @params volumes: Dict[(FilerName, InstanceId), Volume] """ # share instance id # volume key (extracted from volume name) is manila instance id vol_keys = list(volumes.keys()) # Shares: List[Share]) # Share.Keys: share_id, instance_id, deleted_at, status shares = self._query_shares_by_instance_ids( [instance_id for (_, instance_id) in vol_keys]) # merge share into volume r = re.compile('^manila-share-netapp-(?P<filer>.+)@(?P=filer)#.*') for s in shares: m = r.match(s['host']) if m: filer = m.group('filer') else: continue if (filer, s['instance_id']) in volumes: volumes[(filer, s['instance_id'])].update({'share': s}) # loop over vol for (filer, instance_id), vol in volumes.items(): # double check if the manila shares are deleted recently if 'share' in vol: share = vol['share'] deleted_at = share.get('deleted_at', None) if deleted_at is not None: if (datetime.utcnow() - deleted_at).total_seconds() < 6 * 3600: vol_keys.remove((filer, instance_id)) orphan_volumes = {} for vol_key in vol_keys: vol = volumes[vol_key] volume, vserver, filer = vol['volume'], vol['vserver'], vol[ 'filer'] if 'share' in vol: share_id = vol['share']['share_id'] share_deleted = vol['share']['deleted'] share_deleted_at = vol['share']['deleted_at'] instance_id = vol['share']['instance_id'] instance_status = vol['share']['status'] else: share_id, share_deleted, share_deleted_at, instance_id, instance_status = None, None, None, None, '' self.manila_orphan_volumes_gauge.labels( share_id=share_id, share_status=instance_status, filer=filer, vserver=vserver, volume=volume, ).set(1) orphan_volumes[vol_key] = { 'filer': filer, 'vserver': vserver, 'volume': volume, 'share_id': share_id, 'share_deleted': share_deleted, 'share_deleted_at': share_deleted_at, 'instance_id': instance_id, 'instance_status': instance_status, } for k, vol in self.orphan_volumes.items(): if k not in orphan_volumes: self.manila_orphan_volumes_gauge.remove( vol['share_id'], vol['instance_status'], vol['filer'], vol['vserver'], vol['volume']) with self.orphan_volumes_lock: self.orphan_volumes = update_records(self.orphan_volumes, orphan_volumes) def _get_netapp_volumes(self, status='online'): """ get netapp volumes from prometheus metrics return [<vol>, <vol>, ...] """ def _merge_dicts(dict_a, dict_b): dict_a.update(dict_b) return dict_a def _filter_labels(vol): return { 'volume': vol['volume'], 'vserver': vol['vserver'], 'filer': vol['filer'], } if status == 'online': query = "netapp_volume_total_bytes{app='netapp-capacity-exporter-manila'} + "\ "netapp_volume_snapshot_reserved_bytes" results = self._fetch_prom_metrics(query) return [ _merge_dicts(_filter_labels(vol['metric']), {'size': int(vol['value'][1]) / ONEGB}) for vol in results ] if status == 'offline': query = "netapp_volume_state{app='netapp-capacity-exporter-manila'}==3" results = self._fetch_prom_metrics(query) return [_filter_labels(vol['metric']) for vol in results] def _fetch_prom_metrics(self, query): try: r = requests.get(self.prom_host, params={ 'query': query, 'time': time.time() }) except Exception as e: raise type(e)(f'_fetch_prom_metrics(query=\"{query}\"): {e}') if r.status_code != 200: return None return r.json()['data']['result'] def _query_shares_by_instance_ids(self, instance_ids): """ @return List[Share] Share: Dict[Keys['share_id', 'instance_id', 'created_at', 'updated_at', 'deleted_at', 'deleted', 'status', 'host'], Any] """ shares_t = Table('shares', self.db_metadata, autoload=True) instances_t = Table('share_instances', self.db_metadata, autoload=True) q = select([shares_t.c.id.label('share_id'), shares_t.c.created_at, shares_t.c.updated_at, shares_t.c.deleted_at, shares_t.c.deleted, instances_t.c.status, instances_t.c.id.label('instance_id'), instances_t.c.host, ])\ .where(shares_t.c.id == instances_t.c.share_id)\ .where(instances_t.c.id.in_(instance_ids)) r = q.execute() return [dict(zip(r.keys(), x)) for x in r.fetchall()] def _query_shares(self): """ Get shares that are not deleted """ shares = Table('shares', self.db_metadata, autoload=True) instances = Table('share_instances', self.db_metadata, autoload=True) stmt = select([shares.c.id, shares.c.display_name, shares.c.size, shares.c.created_at, shares.c.updated_at, instances.c.id, instances.c.status, instances.c.host, ])\ .select_from( shares.join(instances, shares.c.id == instances.c.share_id))\ .where(shares.c.deleted == 'False') shares = [] for (sid, name, size, ctime, utime, siid, status, host) in stmt.execute(): shares.append({ 'id': sid, 'name': name, 'size': size, 'created_at': ctime, 'updated_at': utime, 'instance_id': siid, 'status': status, 'host': host, }) return shares def _merge_share_and_volumes(self, shares, volumes): """ Merge shares and volumes by share id and volume name Assuming the volume name is `share_[share_instance_id]`. Update the share object with the volume fields ("filer", "vserver", "volume", "volume_size"). Args: shares: List[] volumes: List[] Return: (shares, volumes): merged shares and unmerged volumes shares: Dict[(ShareId, InstanceId): Share] volumes: Dict[VolumeName: Volume] """ r = re.compile('^manila-share-netapp-(?P<filer>.+)@(?P=filer)#.*') _shares = {(s['id'], s['instance_id']): s for s in shares} _volumes = {(vol['filer'], vol['volume'][6:].replace('_', '-')): vol for vol in volumes if vol['volume'].startswith('share_')} for (share_id, instance_id), share in _shares.items(): m = r.match(share['host']) if m: filer = m.group('filer') vol = _volumes.pop((filer, instance_id), None) else: continue if vol: _shares[(share_id, instance_id)].update({'volume': vol}) return _shares, _volumes def set_share_size(self, share_id, share_size): now = datetime.utcnow() shares_t = Table('shares', self.db_metadata, autoload=True) share_instances_t = Table('share_instances', self.db_metadata, autoload=True) update(shares_t) \ .values(updated_at=now, size=share_size) \ .where(shares_t.c.id == share_instances_t.c.share_id) \ .where(and_(shares_t.c.id == share_id, share_instances_t.c.status == 'available')) \ .execute() def _reset_share_state(self, share_id, state): try: self.manilaclient.shares.reset_state(share_id, state) except Exception as e: log.exception("_reset_share_state(share_id=%s, state=%s): %s", share_id, state, e) @response def get_orphan_volumes(self): with self.orphan_volumes_lock: orphan_volumes = list(self.orphan_volumes.values()) return orphan_volumes @response def get_offline_volumes(self): with self.offline_volumes_lock: offline_volumes = list(self.offline_volumes.values()) return offline_volumes @response def get_missing_volume_shares(self): with self.missing_volumes_lock: missing_volumes = list(self.missing_volumes.values()) return sorted(missing_volumes, key=lambda v: v['share_id'])
items = format['items'] return items while 1: config.load_incluster_config() k8s_api_obj = client.CoreV1Api() nss = get_items(k8s_api_obj.list_namespace()) for i in nss: ns = i['metadata']['name'] pods = get_items(k8s_api_obj.list_namespaced_pod(ns)) pvcs = get_items( k8s_api_obj.list_namespaced_persistent_volume_claim(ns)) for p in pods: for vc in p['spec']['volumes']: if vc['persistent_volume_claim']: pvc = vc['persistent_volume_claim']['claim_name'] for v in pvcs: if v['metadata']['name'] == pvc: vol = v['spec']['volume_name'] pod = p['metadata']['name'] print("PVC: %s, VOLUME: %s, POD: %s" % (pvc, vol, pod)) if pvc in pool.keys(): g.remove(pvc, pool[pvc][0], pool[pvc][1]) g.labels(pvc, vol, pod) pool[pvc] = [vol, pod] else: g.labels(pvc, vol, pod) pool[pvc] = [vol, pod] sleep(15)
g = Gauge('pvc_mapping', 'fetching the mapping between pod and pvc', ['persistentvolumeclaim', 'mountedby']) pool = {} while 1: config.load_incluster_config() k8s_api_obj = client.CoreV1Api() ret = k8s_api_obj.list_namespace() ret = ret.to_dict() ret = ret['items'] for i in ret: na = i['metadata']['name'] print(na) pods = k8s_api_obj.list_namespaced_pod(na) pods = pods.to_dict() pods = pods['items'] for p in pods: for v in p['spec']['volumes']: if v['persistent_volume_claim']: pvc = v['persistent_volume_claim']['claim_name'] pod = p['metadata']['name'] print(pvc, pod) #g.labels(pvc,pod).set(1) if pvc in pool.keys(): g.remove(pvc, pool[pvc]) g.labels(pvc, pod) pool[pvc] = pod else: g.labels(pvc, pod) pool[pvc] = pod sleep(15)
class HTTPRequestMetric(threading.Thread): def __init__(self, result_q, q_timeout, port, timeout, **kwargs): super().__init__(**kwargs) self.metrics = dict() self.m1 = Gauge(COMPUTER_STATE, "PC state", ["uid", "statename"]) self.m2 = Gauge(HOST_UPTIME, "Host uptime", ["uid", "hostname", "ip", "domainname", "versionsystem"]) self.m3 = Gauge(USER_UPTIME, "User uptime", ["uid", "hostname", "ip", "domainname", "username", "versionsystem"]) self._port = port self._timeout = timeout self._stopped = False start_http_server(self._port) self.m1_old = [] self.m2_old = [] self.m3_old = [] self.state_off = {} # self.result_q = result_q self.timeout = q_timeout self._seconds = 0 def stop(self): self._stopped = True def clear_metrics(self): for el in self.m1_old: self.m1.remove(*el) self.m1_old = [] for el in self.m2_old: self.m2.remove(*el) self.m2_old = [] for el in self.m3_old: self.m3.remove(*el) self.m3_old = [] # def check_state(self): for key in self.metrics: if self.metrics[key]["state"] == STATE_OFF: continue if self.metrics[key]["state"] == STATE_UNKNOWN: if datetime.datetime.now().timestamp() - self.metrics[key]["time_last_action"] > STATE_TIMEOUT_OFF: self.metrics[key]["state"] = STATE_OFF else: if datetime.datetime.now().timestamp() - self.metrics[key]["time_last_action"] > STATE_TIMEOUT_UNKNOWN: self.metrics[key]["state"] = STATE_UNKNOWN # def read_queue(self): self._seconds += self._timeout if self._seconds > 60: self._seconds = 0 self.check_state() try: data = self.result_q.get(block=True, timeout=self.timeout) self.metrics[data["uid"]] = data.copy() self.result_q.task_done() except queue.Empty: pass except: self.result_q.task_done() # def make_metrics(self): for key, val in self.metrics.items(): self.m1.labels(key, STATE[val["state"]]).set(1) self.m1_old.append((key, STATE[val["state"]])) if val["state"] != STATE_OFF and self.state_off.get(key, 0) < STATE_TIMEOUT_UNKNOWN: self.m2.labels(key, val["hostname"], val["ip"], val["domainname"], val["versionsystem"]).set(val["host_uptime"]) self.m2_old.append((key, val["hostname"], val["ip"], val["domainname"], val["versionsystem"])) if val["username"]: self.m3.labels(key, val["hostname"], val["ip"], val["domainname"], val["username"], val["versionsystem"]).set( val["user_uptime"]) self.m3_old.append((key, val["hostname"], val["ip"], val["domainname"], val["username"], val["versionsystem"])) self.state_off[key] = \ self.state_off.get(key, 0) + self._timeout if val["state"] == STATE_UNKNOWN else 0 elif val["state"] == STATE_ON: self.state_off[key] = 0 def run(self): """Основной цикл обработки данных""" while not self._stopped: self.read_queue() self.clear_metrics() self.make_metrics() time.sleep(self._timeout)
class PrometheusDB(BasePrometheusDB): """ Database that expose received data as metric in order to be scrapped by a prometheus instance Could only be used with a pusher actor """ def __init__(self, report_type: Type[Report], port: int, address: str, metric_name: str, metric_description: str, aggregation_periode: int, tags: List[str]): """ :param address: address that expose the metric :param port: :param metric_name: :param metric_description: short sentence that describe the metric :param aggregation_periode: number of second for the value must be aggregated before compute statistics on them :param tags: metadata used to tag metric """ BasePrometheusDB.__init__(self, report_type, port, address, metric_name, metric_description, tags) self.aggregation_periode = aggregation_periode self.final_tags = ['sensor', 'target'] + tags self.mean_metric = None self.std_metric = None self.min_metric = None self.max_metric = None self.exposed_measure = {} self.measure_for_current_period = {} self.current_period_end = 0 self.buffer = StatBuffer(aggregation_periode) def __iter__(self): raise NotImplementedError() def _init_metrics(self): self.mean_metric = Gauge(self.metric_name + '_mean', self.metric_description + '(MEAN)', self.final_tags) self.std_metric = Gauge(self.metric_name + '_std', self.metric_description + '(STD)', self.final_tags) self.min_metric = Gauge(self.metric_name + '_min', self.metric_description + '(MIN)', self.final_tags) self.max_metric = Gauge(self.metric_name + '_max', self.metric_description + '(MAX)', self.final_tags) def _expose_data(self, key): aggregated_value = self.buffer.get_stats(key) if aggregated_value is None: return kwargs = { label: aggregated_value['tags'][label] for label in self.final_tags } try: self.mean_metric.labels(**kwargs).set(aggregated_value['mean']) self.std_metric.labels(**kwargs).set(aggregated_value['std']) self.min_metric.labels(**kwargs).set(aggregated_value['min']) self.max_metric.labels(**kwargs).set(aggregated_value['max']) except TypeError: self.mean_metric.labels(kwargs).set(aggregated_value['mean']) self.std_metric.labels(kwargs).set(aggregated_value['std']) self.min_metric.labels(kwargs).set(aggregated_value['min']) self.max_metric.labels(kwargs).set(aggregated_value['max']) def _report_to_measure_and_key(self, report): value = self.report_type.to_prometheus(report, self.tags) key = ''.join([str(value['tags'][tag]) for tag in self.final_tags]) return key, value def _update_exposed_measure(self): updated_exposed_measure = {} for key in self.exposed_measure: if key not in self.measure_for_current_period: args = self.exposed_measure[key] self.mean_metric.remove(*args) self.std_metric.remove(*args) self.min_metric.remove(*args) self.max_metric.remove(*args) else: updated_exposed_measure[key] = self.exposed_measure[key] self.exposed_measure = updated_exposed_measure def _append_measure_from_old_period_to_buffer_and_expose_data(self): for old_key, old_measure_list in self.measure_for_current_period.items( ): for old_measure in old_measure_list: self.buffer.append(old_measure, old_key) self._expose_data(old_key) def _reinit_persiod(self, new_measure_time): self.current_period_end = new_measure_time + self.aggregation_periode self.measure_for_current_period = {} def save(self, report: Report): """ Override from BaseDB :param report: Report to save """ key, measure = self._report_to_measure_and_key(report) if measure['time'] > self.current_period_end: self._append_measure_from_old_period_to_buffer_and_expose_data() self._update_exposed_measure() self._reinit_persiod(measure['time']) if key not in self.exposed_measure: args = [measure['tags'][label] for label in self.final_tags] self.exposed_measure[key] = args if key not in self.measure_for_current_period: self.measure_for_current_period[key] = [] self.measure_for_current_period[key].append(measure) def save_many(self, reports: List[Report]): """ Save a batch of data :param reports: Batch of data. """ for report in reports: self.save(report)
class runnerExports: def __init__(self): # Define metrics to expose self.metric_runner_org_status = Gauge( "github_runner_org_status", "Runner status", ["name", "id", "os", "labels", "status"], ) self.metric_runner_org_label_status = Gauge( "github_runner_org_label_status", "Runner label status", ["name", "id", "os", "label", "status"], ) self.metric_runner_org_busy = Gauge( "github_runner_org_busy", "Runner busy status", ["name", "id", "os", "labels", "busy"], ) def export_metrics(self, runner_list: list): current_runners = [] for runner in runner_list: agg_labels = self.aggregate_labels(runner["labels"]) # Export metrics self.export_runner_status(runner, agg_labels) self.export_runner_busy(runner, agg_labels) # Updated active runners list current_runners.append(str(runner["id"])) self.ghostbuster(current_runners) def ghostbuster(self, current_runners): """ Case some runner is deleted this function will remove from the metrics """ # Remove ghosts form metric_runner_org_status metric runners_to_remove = [] for ( runner_name, runner_id, runner_os, labels, runner_status, ) in self.metric_runner_org_status._metrics: if runner_id not in current_runners: runners_to_remove.append( (runner_name, runner_id, runner_os, labels, runner_status)) for ( runner_name, runner_id, runner_os, labels, runner_status, ) in runners_to_remove: self.metric_runner_org_status.remove(runner_name, runner_id, runner_os, labels, runner_status) # Remove ghosts form metric_runner_org_label_status metric runners_to_remove = [] for ( runner_name, runner_id, runner_os, runner_label, runner_status, ) in self.metric_runner_org_label_status._metrics: if runner_id not in current_runners: runners_to_remove.append((runner_name, runner_id, runner_os, runner_label, runner_status)) for ( runner_name, runner_id, runner_os, runner_label, runner_status, ) in runners_to_remove: self.metric_runner_org_label_status.remove(runner_name, runner_id, runner_os, runner_label, runner_status) # Remove ghosts form metric_runner_org_busy metric runners_to_remove = [] for ( runner_name, runner_id, runner_os, labels, runner_busy, ) in self.metric_runner_org_busy._metrics: if runner_id not in current_runners: runners_to_remove.append( (runner_name, runner_id, runner_os, labels, runner_busy)) for runner_name, runner_id, runner_os, labels, runner_busy in runners_to_remove: self.metric_runner_org_busy.remove(runner_name, runner_id, runner_os, labels, runner_busy) def aggregate_labels(self, labels: dict): """ Aggregate the runners labels in string """ agg_labels = [] for label in labels: if label["type"] == "custom": agg_labels.append(label["name"]) return ",".join(agg_labels) def export_runner_status(self, runner: dict, agg_labels: str): online = 1 offline = 0 if runner.get("status") != "online": online = 0 offline = 1 self.metric_runner_org_status.labels(runner.get("name"), runner.get("id"), runner.get("os"), agg_labels, "online").set(online) self.metric_runner_org_status.labels( runner.get("name"), runner.get("id"), runner.get("os"), agg_labels, "offline", ).set(offline) for label in runner["labels"]: self.metric_runner_org_label_status.labels( runner.get("name"), runner.get("id"), runner.get("os"), label["name"], "online", ).set(online) self.metric_runner_org_label_status.labels( runner.get("name"), runner.get("id"), runner.get("os"), label["name"], "offline", ).set(offline) def export_runner_busy(self, runner: dict, agg_labels: str): idle = 1 busy = 0 if runner.get("busy") == True: idle = 0 busy = 1 self.metric_runner_org_busy.labels(runner.get("name"), runner.get("id"), runner.get("os"), agg_labels, "true").set(busy) self.metric_runner_org_busy.labels(runner.get("name"), runner.get("id"), runner.get("os"), agg_labels, "false").set(idle)
class metric_labels: def __init__(self, name, labels, values=None, description=None): self.name = name self.values = dict() self.labels = labels if description is None: description = name.replace("_", " ") self.metric = Gauge(name.lower(), description, labels) if not values is None: self.update(values) self.label_sets = list() def __zero_missing_value(self, value): if isinstance(value, dict): for label in value: value[label] = self.__zero_missing_value(value[label]) else: value = 0 return value def __remove_empty_values(self, values): removeable_values = list() if isinstance(values, dict): for label in values: if not isinstance(values[label], dict): if values[label] < 1: removeable_values.append(label) else: values[label] = self.__remove_empty_values(values[label]) if not values[label]: removeable_values.append(label) for labels in removeable_values: del values[label] return values def __remove_empty_label_sets(self, values, labels=None): if not labels: labels = list() if isinstance(values, dict): for label in values: labels_new = labels.copy() labels_new.append(label) self.__remove_empty_label_sets(values[label], labels_new) else: if values < 1: if labels in self.label_sets: self.metric.remove(*labels) del self.label_sets[labels] def __update_old_values(self, old_values, values): for label in old_values: if not label in values: old_values[label] = self.__zero_missing_value( old_values[label]) else: if isinstance(old_values[label], dict): old_values[label] = self.__update_old_values( old_values[label], values[label]) return old_values def __add_new_values(self, old_values, values): for label in values: if not isinstance(values[label], dict): old_values[label] = values[label] else: if label in old_values: old_values[label] = self.__add_new_values( old_values[label], values[label]) else: old_values[label] = values[label] return old_values def __update_metrics(self, values, labels=None): for label in values: labels_tmp = list() if not labels is None: for i in labels: labels_tmp.append(i) labels_tmp.append(label) if not isinstance(values[label], dict): if not labels_tmp in self.label_sets: self.label_sets.append(labels_tmp) self.metric.labels(*labels_tmp).set(values[label]) labels_tmp.pop() else: self.__update_metrics(values[label], labels_tmp) def __add_value_dict(self, d, items, value): if len(items) > 1: if not items[0] in d: d[items[0]] = dict() current = items[0] items.pop(0) d[current] = self.__add_value_dict(d[current], items, value) else: d[items[0]] = value return d def get_value(self): return self.values def get_name(self): return self.name def get_labels(self): return self.labels def update(self, values): if isinstance(values, list): values_new = dict() for v in values: v_temp = v[:len(v) - 1] metric_value = v[len(v) - 1] values_new = self.__add_value_dict(values_new, v_temp, metric_value) values = values_new self.values = self.__add_new_values(self.values, values) self.values = self.__update_old_values(self.values, values) self.__remove_empty_label_sets(self.values) self.values = self.__remove_empty_values(self.values) self.__update_metrics(self.values)
["user", "host", "db", "permission"]) try: while (True): MySQLStats = MySQLUserInformation(db) counter = 0 for users in MySQLStats.GetUsers(): for permission in users['Permission'].items(): gauge.labels(users['User'], users["Host"], users["DB"], permission[0]).set(permission[1]) counter += 1 sleep(int(config.GetWebServerConfiguration()['refresh'])) ''' Removes gauges Why do we need to remove them? I Discovered a problem when a MySQL user is removed or loses all permissions. each gauge representing that users permissions are no longer generated by the previous loop. However, the gauge holds on to the previous versions of the gauges and continues to display them when queried. This resulted in deleted users still showing up when queried with old values. ''' for users in MySQLStats.GetUsers(): for permission in users['Permission'].items(): gauge.remove(users['User'], users["Host"], users["DB"], permission[0]) # Cleanup del MySQLStats finally: db.close()
class DirectPrometheusDB(BasePrometheusDB): """ Database that expose received data as metric in order to be scrapped by a prometheus instance Could only be used with a pusher actor """ def __init__(self, report_type: Type[Report], port: int, address: str, metric_name: str, metric_description: str, tags: List[str]): """ :param address: address that expose the metric :param port: :param metric_name: :param metric_description: short sentence that describe the metric :param tags: metadata used to tag metric """ BasePrometheusDB.__init__(self, report_type, port, address, metric_name, metric_description, tags) self.energy_metric = None self.current_ts = 0 self.exposed_measure = {} self.measure_for_current_period = {} def __iter__(self): raise NotImplementedError() def _init_metrics(self): self.energy_metric = Gauge(self.metric_name, self.metric_description, ['sensor', 'target'] + self.tags) def _expose_data(self, _, measure): kwargs = {label: measure['tags'][label] for label in measure['tags']} try: self.energy_metric.labels(**kwargs).set(measure['value']) except TypeError: self.energy_metric.labels(kwargs).set(measure['value']) def _report_to_measure_and_key(self, report): value = self.report_type.to_prometheus(report, self.tags) key = ''.join([str(value['tags'][tag]) for tag in value['tags']]) return key, value def _update_exposed_measure(self): for key in self.exposed_measure: if key not in self.measure_for_current_period: args = self.exposed_measure[key] self.energy_metric.remove(*args) self.exposed_measure = self.measure_for_current_period self.measure_for_current_period = {} def save(self, report: Report): """ Override from BaseDB :param report: Report to save """ key, measure = self._report_to_measure_and_key(report) if self.current_ts != measure['time']: self.current_ts = measure['time'] self._update_exposed_measure() self._expose_data(key, measure) if key not in self.measure_for_current_period: args = [measure['tags'][label] for label in measure['tags']] self.measure_for_current_period[key] = args def save_many(self, reports: List[Report]): """ Save a batch of data :param reports: Batch of data. """ for report in reports: self.save(report)
pprint(e) pprint(result) except Exception as e: print(f"Got a mystery error for {name}:") pprint(e) if __name__ == "__main__": logging.basicConfig(level=logging.DEBUG) start_http_server(9402) with open(sys.argv[1]) as channel_data: channels = json.load(channel_data) revisions = {} while True: for (channel, about) in channels.items(): measurement = measure_channel(channel) if measurement is not None: revision = measurement['revision'] CHANNEL_UPDATE_TIME.labels(channel=channel).set(measurement['timestamp']) CHANNEL_REVISION.labels(channel=channel, revision=measurement['revision']).set(1) CHANNEL_CURRENT.labels(channel=channel).set(int(about['current'])) print('updated {}'.format(channel)) previous_revision = revisions.pop(channel, None) revisions[channel] = revision if previous_revision and previous_revision != revision: CHANNEL_REVISION.remove(channel, previous_revision)
class sensor_server(object): def __init__(self, listen_port, sleep=LOOP_SLEEP_TIME, cmd=CMD): self.sleep = sleep self.last_seen = defaultdict(lambda: 0) start_http_server(listen_port) self.acurite_temp = Gauge('acurite_temp', 'acurite temperature in DegF', ['id', 'model']) # self.acurite_temp = Gauge( # 'acurite_temp', 'acurite temperature in DegF').set_function(lambda: self.show_temp()) self.acurite_hum = Gauge('acurite_hum', 'acurite humidity in %RH', ['id', 'model']) self.acurite_battery_low = Gauge('acurite_battery_low', 'acurite battery_low', ['id', 'model']) self.acurite_last_seen = Gauge('acurite_last_seen', 'acurite last_seen', ['id', 'model']) self.process = subprocess.Popen(shlex.split(CMD), stdout=subprocess.PIPE) def expire_sensors(self): for sensor_id in list(self.last_seen.keys()): age = time.time() - self.last_seen[sensor_id] if age > METRIC_TTL: logging.info('removing stale sensor: %s age: %s', sensor_id, age) self.acurite_temp.remove(sensor_id) self.acurite_hum.remove(sensor_id) self.acurite_battery_low.remove(sensor_id) self.acurite_last_seen.remove(sensor_id) del self.last_seen[sensor_id] def serve_forever(self): # TODO: Redo with poll() so we can expire the last sensor while True: data = json.loads(self.process.stdout.readline()) # Acurite 986 Sensor uses "battery=OK" instead of "battery_low=0" if data.get('battery'): battery = data.get('battery') if battery == "OK": data['battery_low'] = 0 else: data['battery_low'] = 1 logging.debug(data) # print(self.metrics) sensor_id = data.get('id') model = MODEL_MAP.get(data.get('model')) self.acurite_temp.labels(id=sensor_id, model=model).set( data.get('temperature_F')) if data.get('humidity'): self.acurite_hum.labels(id=sensor_id, model=model).set(data.get('humidity')) self.acurite_battery_low.labels(id=sensor_id, model=model).set( data.get('battery_low')) now = time.time() self.acurite_last_seen.labels(id=sensor_id, model=model).set(now) self.last_seen[sensor_id] = now self.expire_sensors() logging.debug("sleeping %s...", self.sleep) time.sleep(self.sleep)