def update_gauges(metrics):
    metric_dict = {}
    for (name_list, label_dict, value) in metrics:
        metric_name = format_metric_name(name_list)
        if metric_name not in metric_dict:
            metric_dict[metric_name] = (tuple(label_dict.keys()), {})

        label_keys = metric_dict[metric_name][0]
        label_values = tuple([
            format_label_value(label_dict[key])
            for key in label_keys
        ])

        metric_dict[metric_name][1][label_values] = value

    for metric_name, (label_keys, value_dict) in metric_dict.items():
        if metric_name in gauges:
            (old_label_values_set, gauge) = gauges[metric_name]
        else:
            old_label_values_set = set()
            gauge = Gauge(metric_name, '', label_keys)

        new_label_values_set = set(value_dict.keys())

        for label_values in old_label_values_set - new_label_values_set:
            gauge.remove(*label_values)

        for label_values, value in value_dict.items():
            if label_values:
                gauge.labels(*label_values).set(value)
            else:
                gauge.set(value)

        gauges[metric_name] = (new_label_values_set, gauge)
示例#2
0
class QueryMetric(object):
    def __init__(self, name, cfg, verbose):
        self.name = name
        description = cfg['description']
        self.query = cfg['query']
        labels = cfg['labels']
        # Let prometheus_client handle name validation
        self.prometheus_gauge = Gauge(self.name, description, labels)
        self.verbose = verbose
        # HQL count won't return 0 so need to explicitly delete labelsets
        self.labelsets = set()

    def update(self, queryservice):
        results = queryservice.projection(
            self.query, None, {'omero.group': '-1'})
        if not results:
            if self.verbose:
                print('%s NULL' % self.name)
        prev_labelsets = self.labelsets
        self.labelsets = set()
        for r in results:
            labelvalues = [lv for lv in unwrap(r[1:])]
            value = unwrap(r[0])
            self.prometheus_gauge.labels(*labelvalues).set(value)
            self.labelsets.add(tuple(labelvalues))
            if self.verbose:
                print('%s %s %s' % (self.name, labelvalues, value))
        # Now delete absent labelsets
        for rm in prev_labelsets.difference(self.labelsets):
            self.prometheus_gauge.remove(*rm)
            if self.verbose:
                print('Removed %s %s' % (self.name, rm))
class PrometheusEndpoint(BaseFlatliner):
    def __init__(self, pruning_interval: int = 300):
        super().__init__()
        self.pruning_interval = pruning_interval

        _LOGGER.info(
            "Prometheus Endpoint initialized. Metric pruning interval is {0} seconds"
            .format(self.pruning_interval))

        self.published_metric_timestamps = defaultdict(list)
        # This is the gauge metric where the metric data is published
        self.weirdness_score_gauge = Gauge(
            'weirdness_score',
            'Weirdness score for the given Cluster and Version',
            ['cluster', 'version'])

    def on_next(self, x):
        try:
            # update the published metrics
            self.weirdness_score_gauge.labels(cluster=str(x.cluster),
                                              version=str(x.version)).set(
                                                  x.weirdness_score)

            # Store timestamp when the metric was published and metric version info
            self.published_metric_timestamps[str(
                x.cluster)] = [int(time()), str(x.version)]
        except Exception as e:
            _LOGGER.error(
                "Couldn't process the following packet {0}. Reason: {1}".
                format(x, str(e)))
            raise e

    def _delete_stale_metrics(self):
        '''
        This function will remove any metric that was published $(pruning_interval) seconds ago or older
        '''
        timestamp_threshold = int(time()) - self.pruning_interval

        for cluster_id in list(self.published_metric_timestamps):
            if self.published_metric_timestamps[cluster_id][
                    0] < timestamp_threshold:
                # if metric is stale, stop publishing it
                self.weirdness_score_gauge.remove(
                    cluster_id,
                    self.published_metric_timestamps[cluster_id][1])
                del self.published_metric_timestamps[cluster_id]

    def start_server(self):
        # Start http server to expose metrics
        http_server_port = 8000
        start_http_server(http_server_port)
        _LOGGER.info(
            "http server started on port {0}".format(http_server_port))
        while True:
            # delete stale exposed metrics
            self._delete_stale_metrics()

            _LOGGER.debug("Next metric pruning will be in {} seconds".format(
                self.pruning_interval))
            sleep(self.pruning_interval)
示例#4
0
def update_gauges(metrics):
    metric_dict = {}
    for (name_list, label_dict, value) in metrics:
        metric_name = format_metric_name(name_list)
        if metric_name not in metric_dict:
            metric_dict[metric_name] = (tuple(label_dict.keys()), {})

        label_keys = metric_dict[metric_name][0]
        label_values = tuple(
            [format_label_value(label_dict[key]) for key in label_keys])

        metric_dict[metric_name][1][label_values] = value

    for metric_name, (label_keys, value_dict) in metric_dict.items():
        if metric_name in gauges:
            (old_label_values_set, gauge) = gauges[metric_name]
        else:
            old_label_values_set = set()
            gauge = Gauge(metric_name, '', label_keys)

        new_label_values_set = set(value_dict.keys())

        for label_values in old_label_values_set - new_label_values_set:
            gauge.remove(*label_values)

        for label_values, value in value_dict.items():
            if label_values:
                gauge.labels(*label_values).set(value)
            else:
                gauge.set(value)

        gauges[metric_name] = (new_label_values_set, gauge)
class ManilaShareServerNanny(ManilaNanny):
    """ Manila Share Server """
    def __init__(self, config_file, interval, prom_port, http_port, handler):
        super(ManilaShareServerNanny, self).__init__(config_file,
                                                     interval,
                                                     prom_port=prom_port,
                                                     http_port=http_port,
                                                     handler=handler)
        self.orphan_snapshots_lock = Lock()
        self.orphan_snapshots: Dict[str, Dict[str, str]] = {}
        self.orphan_snapshots_gauge = Gauge(
            'manila_nanny_orphan_share_snapshots',
            'Orphan Manila Share Snapshots', ['share_id', 'snapshot_id'])

    def _run(self):
        s = self.query_orphan_snapshots()
        orphan_snapshots = {
            snapshot_id: {
                'snapshot_id': snapshot_id,
                'share_id': share_id
            }
            for snapshot_id, share_id in s
        }
        for snapshot_id in orphan_snapshots:
            share_id = orphan_snapshots[snapshot_id]['share_id']
            self.orphan_snapshots_gauge.labels(share_id=share_id,
                                               snapshot_id=snapshot_id).set(1)
        for snapshot_id in self.orphan_snapshots:
            if snapshot_id not in orphan_snapshots:
                share_id = self.orphan_snapshots[snapshot_id]['share_id']
                self.orphan_snapshots_gauge.remove(share_id, snapshot_id)
        with self.orphan_snapshots_lock:
            self.orphan_snapshots = update_records(self.orphan_snapshots,
                                                   orphan_snapshots)

    def query_orphan_snapshots(self):
        Snapshots = Table('share_snapshots', self.db_metadata, autoload=True)
        Shares = Table('shares', self.db_metadata, autoload=True)
        q = select([Snapshots.c.id, Snapshots.c.share_id])\
            .select_from(Snapshots.join(Shares, Snapshots.c.share_id == Shares.c.id))\
            .where(Snapshots.c.deleted == 'False')\
            .where(Shares.c.deleted != 'False')
        return list(q.execute())

    @response
    def get_orphan_snapshots(self):
        with self.orphan_snapshots_lock:
            return list(self.orphan_snapshots.values())
示例#6
0
class metric_label:
    def __init__(self, name, label, value=None, description=None):
        self.name = name
        self.values = dict()
        self.label_values = list()
        self.label = label
        if description is None:
            description = name.replace("_", " ")
        self.metric = Gauge(name.lower(), description, [label])
        if not value is None:
            self.metric.labels(label).set(value)

    def update(self, value, remove_labels=True):
        removable_labels = list()
        if len(self.values) < 1:
            remove_labels = False
        for label in value:
            self.values[label] = value[label]
            self.metric.labels(label).set(value[label])
            if label not in self.label_values:
                self.label_values.append(label)
        for label in self.label_values:
            if not label in value:
                self.metric.labels(label).set(0)
                self.values[label] = 0
            if self.values[label] == 0:
                removable_labels.append(label)
        if remove_labels:
            for l in removable_labels:
                if l in self.label_values:
                    self.metric.remove(l)
                    del self.values[l]
                    self.label_values.remove(l)

    def get_value(self):
        return self.values

    def get_label_values(self):
        return self.label_values

    def get_label(self):
        return self.label
示例#7
0
def update_gauges(metrics):
    metric_dict = group_metrics(metrics)

    for metric_name, (label_keys, value_dict) in metric_dict.items():
        if metric_name in gauges:
            (old_label_values_set, gauge) = gauges[metric_name]
        else:
            old_label_values_set = set()
            gauge = Gauge(metric_name, '', label_keys)

        new_label_values_set = set(value_dict.keys())

        for label_values in old_label_values_set - new_label_values_set:
            gauge.remove(*label_values)

        for label_values, value in value_dict.items():
            if label_values:
                gauge.labels(*label_values).set(value)
            else:
                gauge.set(value)

        gauges[metric_name] = (new_label_values_set, gauge)
示例#8
0
class MetricsContainer(object):
    def __init__(self, reader):
        super(MetricsContainer, self).__init__()

        self.logger = logging.getLogger(self.__class__.__name__)

        self.reader = reader
        self.registry = UpdatingRegistryCollector()
        self.registry.on_collect = self.update

        self.min_delay_time = datetime.timedelta(seconds=5)
        self.last_update = None
        self.last_job_names = set()

        self._job_metrics = []

        self._create_job_metric("schedule_time", "schedule_seconds",
                                "Job schedule time")
        self._create_job_metric("start_time", "start_seconds",
                                "Job start time")
        self._create_job_metric("end_time", "end_seconds", "Job end time")
        self._create_job_metric("real_end_time", "real_end_seconds",
                                "Job real end time")
        self._create_job_metric("files", "files_count",
                                "Number of files fetched in job")
        self._create_job_metric("bytes", "size_bytes", "Size of job data")
        self._create_job_metric("status", "status", "Job status")
        self._create_job_metric("level", "level", "Job level")
        self._create_job_metric("id", "id", "Job id")

        self.m_job_bytes_total = Gauge('bacula_job_bytes_total',
                                       'Total size of job',
                                       registry=self.registry,
                                       labelnames=["name"])

    def _create_job_metric(self, model_field, name, description):
        m = Gauge('bacula_finished_job_%s' % name,
                  description,
                  registry=self.registry,
                  labelnames=["name"])

        def update(model):
            v = model[model_field]
            v = 'nan' if v is None else v
            m.labels(model["name"]).set(v)

        def remove(name):
            m.remove([name])

        self._job_metrics.append((update, remove))

    def update(self):
        if self.last_update is not None and (
                datetime.datetime.now() -
                self.last_update) <= self.min_delay_time:
            return

        self.logger.info("Updating metrics")

        job_names = set()
        for job in self.reader.list_global_finished_jobs():
            job_names.add(job["name"])
            for updater, _remover in self._job_metrics:
                updater(job)

        stats = self.reader.get_global_stats()
        for k, v in stats['disk_used_per_job'].items():
            self.m_job_bytes_total.labels(k).set(v)

        for i in self.last_job_names.difference(job_names):
            self.logger.debug("Removing job %s from metrics", i)
            for _updater, remover in self._job_metrics:
                remover(i)

            self.m_job_bytes_total.remove([i])

        self.last_job_names = job_names
        self.last_update = datetime.datetime.now()
                                id=id,
                                version=version,
                                ip=ipAddress).set(nodeinfo[id]["cpuUsage"])
                memoryFreeKB.labels(name=name,
                                    id=id,
                                    version=version,
                                    ip=ipAddress).set(
                                        nodeinfo[id]["memoryFreeKB"])
                storageFreeKB.labels(name=name,
                                     id=id,
                                     version=version,
                                     ip=ipAddress).set(
                                         nodeinfo[id]["storageFreeKB"])
                edgeItems.append([name, id, version, ipAddress])
                try:
                    previousItems.remove([name, id, version, ipAddress])
                except:
                    pass
        logging.debug("label collection to remove: %s", previousItems)
        for item in previousItems:
            totalMemoryKB.remove(*item)
            totalStorageKB.remove(*item)
            cpuUsage.remove(*item)
            memoryFreeKB.remove(*item)
            storageFreeKB.remove(*item)
        previousItems = edgeItems
    else:
        logging.error("edges error: %s", response.content)

    time.sleep(interval)
示例#10
0
    except Exception as e:
        print(f"Got a mystery error for {name}:")
        pprint(e)


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
    start_http_server(9402)

    with open(sys.argv[1]) as channel_data:
        channels = json.load(channel_data)

    revisions = {}

    while True:
        for (channel, about) in channels.items():
            measurement = measure_channel(channel)
            if measurement is not None:
                revision = measurement['revision']
                status = about.get('status', '')
                variant = about.get('variant', '')
                current = int(status != 'unmaintained')
                CHANNEL_UPDATE_TIME.labels(channel=channel).set(measurement['timestamp'])
                CHANNEL_REVISION.labels(channel=channel, revision=revision, status=status, variant=variant, current=current).set(1)
                CHANNEL_CURRENT.labels(channel=channel).set(current)
                print('updated {}'.format(channel))
                previous_revision = revisions.pop(channel, None)
                revisions[channel] = revision
                if previous_revision and previous_revision != revision:
                    CHANNEL_REVISION.remove(channel, previous_revision, status, variant, current)
class Exporter():

    def __init__(self):
        self.basebackup_exception = False
        self.xlog_exception = False
        self.bbs = []
        self.last_archive_check = None
        self.archive_status = None

        # Declare metrics
        self.basebackup = Gauge('walg_basebackup',
                                'Remote Basebackups',
                                ['start_wal_segment', 'start_lsn'])
        self.basebackup_count = Gauge('walg_basebackup_count',
                                      'Remote Basebackups count')
        self.basebackup_count.set_function(lambda: len(self.bbs))

        self.last_upload = Gauge('walg_last_upload',
                                 'Last upload of incremental or full backup',
                                 ['type'])
        self.last_upload.labels('xlog').set_function(
            self.last_xlog_upload_callback)
        self.last_upload.labels('basebackup').set_function(
            lambda: self.bbs[len(self.bbs) - 1]['start_time'].timestamp()
            if self.bbs else 0
        )
        self.oldest_basebackup = Gauge('walg_oldest_basebackup',
                                       'oldest full backup')
        self.oldest_basebackup.set_function(
            lambda: self.bbs[0]['start_time'].timestamp() if self.bbs else 0
        )

        self.xlog_ready = Gauge('walg_missing_remote_wal_segment_at_end',
                                'Xlog ready for upload')
        self.xlog_ready.set_function(self.xlog_ready_callback)

        self.exception = Gauge('walg_exception',
                               'Wal-g exception: 2 for basebackup error, '
                               '3 for xlog error and '
                               '5 for remote error')
        self.exception.set_function(
            lambda: (1 if self.basebackup_exception else 0 +
                     2 if self.xlog_exception else 0))

        self.xlog_since_last_bb = Gauge('walg_xlogs_since_basebackup',
                                        'Xlog uploaded since last base backup')
        self.xlog_since_last_bb.set_function(self.xlog_since_last_bb_callback)

        self.last_backup_duration = Gauge('walg_last_backup_duration',
                                          'Duration of the last full backup')
        self.last_backup_duration.set_function(
            lambda: ((self.bbs[len(self.bbs) - 1]['finish_time'] -
                      self.bbs[len(self.bbs) - 1]['start_time']).total_seconds()
                     if self.bbs else 0)
        )
        self.walg_backup_fuse = Gauge('walg_backup_fuse',"0 backup fuse is OK, 1 backup fuse is burnt")
        self.walg_backup_fuse.set_function(self.backup_fuse_callback)
        # Fetch remote base backups
        self.update_basebackup()

    def update_basebackup(self, *unused):
        """
            When this script receive a SIGHUP signal, it will call backup-list
            and update metrics about basebackups
        """

        info('Updating basebackups metrics...')
        try:
            # Fetch remote backup list
            res = subprocess.run(["wal-g", "backup-list",
                                  "--detail", "--json"],
                                 capture_output=True, check=True)
            new_bbs = list(map(format_date, json.loads(res.stdout)))
            new_bbs.sort(key=lambda bb: bb['start_time'])
            new_bbs_name = [bb['backup_name'] for bb in new_bbs]
            old_bbs_name = [bb['backup_name'] for bb in self.bbs]
            bb_deleted = 0

            # Remove metrics for deleted backups
            for bb in self.bbs:
                if bb['backup_name'] not in new_bbs_name:
                    # Backup deleted
                    self.basebackup.remove(bb['wal_file_name'],
                                           bb['start_lsn'])
                    bb_deleted = bb_deleted + 1
            # Add metrics for new backups
            for bb in new_bbs:
                if bb['backup_name'] not in old_bbs_name:
                    (self.basebackup.labels(bb['wal_file_name'],
                                            bb['start_lsn'])
                     .set(bb['start_time'].timestamp()))
            # Update backup list
            self.bbs = new_bbs
            info("%s basebackups found (first: %s, last: %s), %s deleted",
                 len(self.bbs),
                 self.bbs[0]['start_time'],
                 self.bbs[len(self.bbs) - 1]['start_time'],
                 bb_deleted)

            self.basebackup_exception = False
        except subprocess.CalledProcessError as e:
            error(e)
            self.basebackup_exception = True

    def last_archive_status(self):
        if (self.last_archive_check is None or
                datetime.datetime.now().timestamp() -
                self.last_archive_check > 1):
            self.archive_status = self._last_archive_status()
            self.last_archive_check = datetime.datetime.now().timestamp()
        return self.archive_status

    def _last_archive_status(self):
        with psycopg2.connect(
            host=os.getenv('PGHOST', 'localhost'),
            port=os.getenv('PGPORT', '5432'),
            user=os.getenv('PGUSER', 'postgres'),
            password=os.getenv('PGPASSWORD'),
            dbname=os.getenv('PGDATABASE', 'postgres'),

        ) as db_connection:
            db_connection.autocommit = True
            with db_connection.cursor(cursor_factory=DictCursor) as c:
                c.execute('SELECT archived_count, failed_count, '
                          'last_archived_wal, '
                          'last_archived_time, '
                          'last_failed_wal, '
                          'last_failed_time '
                          'FROM pg_stat_archiver')
                res = c.fetchone()
                if not bool(result):
                    raise Exception("Cannot fetch archive status")
                return res

    def last_xlog_upload_callback(self):
        archive_status = self.last_archive_status()
        return archive_status['last_archived_time'].timestamp()

    def xlog_ready_callback(self):
        res = 0
        try:
            for f in os.listdir(archive_dir):
                # search for xlog waiting for upload
                if READY_WAL_RE.match(f):
                    res += 1
            self.xlog_exception = 0
        except FileNotFoundError:
            self.xlog_exception = 1
        return res

    def xlog_since_last_bb_callback(self):
        # Compute xlog_since_last_basebackup
        if self.bbs:
            archive_status = self.last_archive_status()
            return wal_diff(archive_status['last_archived_wal'],
                            self.bbs[len(self.bbs) - 1]['wal_file_name'])
        else:
            return 0

    def backup_fuse_callback(self):
        return int(os.path.exists('/tmp/failed_pg_archive'))
                      (gwid, devices[gwid]['name'], on, w, mA, V))

            label_values = [
                devices[gwid]['name'], gwid, data['ip'], data['version']
            ]
            gs.labels(*label_values).set(on)
            gp.labels(*label_values).set(w)
            ga.labels(*label_values).set(mA / 1000)
            gv.labels(*label_values).set(V)

            devices[gwid]['lastseen'] = time.time()
        else:
            print("Error: %s %s. Wrong device key?" % (err, gwid))

        # cleanup metrics if device offline for more than 30 sec
        for d in devices:
            if devices[d][
                    'lastseen'] and devices[d]['lastseen'] < time.time() - 30:
                print("device %s (%s) gone offline" %
                      (devices[d]['id'], devices[d]['name']))
                devices[d]['lastseen'] = False

                label_values = [
                    devices[d]['name'], d, devices[d]['ip'],
                    devices[d]['version']
                ]
                gs.remove(*label_values)
                gp.remove(*label_values)
                ga.remove(*label_values)
                gv.remove(*label_values)
示例#13
0
class ManilaShareSyncNanny(ManilaNanny):
    def __init__(self, config_file, prom_host, interval, tasks, dry_run_tasks,
                 prom_port, http_port, handler):
        super(ManilaShareSyncNanny, self).__init__(config_file,
                                                   interval,
                                                   prom_port=prom_port,
                                                   http_port=http_port,
                                                   handler=handler)
        self.prom_host = prom_host + "/api/v1/query"

        self.MANILA_NANNY_SHARE_SYNC_FAILURE = Counter(
            'manila_nanny_share_sync_failure', '')
        self.MANILA_SYNC_SHARE_SIZE_COUNTER = Counter(
            'manila_nanny_sync_share_size', 'manila nanny sync share size')
        self.MANILA_RESET_SHARE_ERROR_COUNTER = Counter(
            'manila_nanny_reset_share_error',
            'manila nanny reset share status to error')
        self.manila_missing_volume_shares_gauge = Gauge(
            'manila_nanny_share_missing_volume',
            'Manila Share missing backend volume',
            ['share_id', 'instance_id', 'share_name', 'share_status'])
        self.manila_orphan_volumes_gauge = Gauge(
            'manila_nanny_orphan_volumes',
            'Orphan backend volumes of Manila service',
            ['share_id', 'share_status', 'filer', 'vserver', 'volume'])
        self.manila_offline_volumes_gauge = Gauge(
            'manila_nanny_offline_volumes',
            'Offline volumes of Manila service',
            ['share_id', 'share_status', 'filer', 'vserver', 'volume'])

        self._tasks = tasks
        self._dry_run_tasks = dry_run_tasks
        if not any(tasks.values()):
            raise Exception('All tasks are disabled')

        self.orphan_volumes_lock = Lock()
        self.orphan_volumes = {}
        self.missing_volumes_lock = Lock()
        self.missing_volumes = {}
        self.offline_volumes_lock = Lock()
        self.offline_volumes = {}

    def _run(self):
        # Need to recreate manila client each run, because of session timeout
        # self.renew_manila_client()

        # fetch data
        try:
            if self._tasks[TASK_SHARE_SIZE] or self._tasks[TASK_MISSING_VOLUME]\
                    or self._tasks[TASK_ORPHAN_VOLUME]:
                _share_list = self._query_shares()
                _volume_list = self._get_netapp_volumes()
                _shares, _orphan_volumes = self._merge_share_and_volumes(
                    _share_list, _volume_list)

            if self._tasks[TASK_OFFLINE_VOLUME]:
                _offline_volume_list = self._get_netapp_volumes('offline')
        except Exception as e:
            log.warning(e)
            self.MANILA_NANNY_SHARE_SYNC_FAILURE.inc()
            return

        if self._tasks[TASK_SHARE_SIZE]:
            dry_run = self._dry_run_tasks[TASK_SHARE_SIZE]
            self.sync_share_size(_shares, dry_run)

        if self._tasks[TASK_MISSING_VOLUME]:
            dry_run = self._dry_run_tasks[TASK_MISSING_VOLUME]
            self.process_missing_volume(_shares, dry_run)

        if self._tasks[TASK_ORPHAN_VOLUME]:
            dry_run = self._dry_run_tasks[TASK_ORPHAN_VOLUME]
            self.process_orphan_volumes(_orphan_volumes, dry_run)

        if self._tasks[TASK_OFFLINE_VOLUME]:
            dry_run = self._dry_run_tasks[TASK_OFFLINE_VOLUME]
            self.process_offline_volumes(_offline_volume_list, dry_run)

    def sync_share_size(self, shares, dry_run=True):
        """ Backend volume exists, but share size does not match """
        msg = "share %s: share size != netapp volume size (%d != %d)"
        msg_dry_run = "Dry run: " + msg
        for (share_id, _), share in shares.items():
            if 'volume' not in share:
                continue
            size, vsize = share['size'], share['volume']['size']

            # volume size can not be zero, could be in offline state
            if vsize == 0:
                continue

            if share['updated_at'] is not None:
                if is_utcts_recent(share['updated_at'], 3600):
                    continue

            if size != vsize:
                if dry_run:
                    log.info(msg_dry_run, share_id, size, vsize)
                else:
                    log.info(msg, share_id, size, vsize)
                    self.set_share_size(share_id, vsize)
                    self.MANILA_SYNC_SHARE_SIZE_COUNTER.inc()

    def process_missing_volume(self, shares, dry_run=True):
        """ Set share state to error when backend volume is missing

        Ignore shares that are created/updated within 6 hours.
        """
        missing_volumes = {}

        for (share_id, instance_id), share in shares.items():
            if 'volume' not in share:
                # check if shares are created/updated recently
                if is_utcts_recent(share['updated_at'] or share['created_at'],
                                   6 * 3600):
                    continue

                share_name = share['name']
                share_status = share['status']
                msg = f'ManilaShareMissingVolume: share={share_id}, '\
                    f'instance={instance_id}, status={share_status}'

                if not dry_run:
                    if share_status == 'available':
                        self._reset_share_state(share_id, 'error')
                        share_status = 'error'
                        msg = f'ManilaShareMissingVolume: Set share {share_id} to error'
                else:
                    msg = 'Dry run: ' + msg

                log.info(msg)

                self.manila_missing_volume_shares_gauge.labels(
                    share_id=share_id,
                    instance_id=instance_id,
                    share_name=share_name,
                    share_status=share_status,
                ).set(1)

                missing_volumes[(share_id, instance_id)] = {
                    'share_id': share_id,
                    'instance_id': instance_id,
                    'share_name': share_name,
                    'share_status': share_status,
                }

        for (share_id, instance_id) in self.missing_volumes:
            s = self.missing_volumes[(share_id, instance_id)]
            share_name, share_status = s['share_name'], s['share_status']
            if (share_id, instance_id) not in shares:
                self.manila_missing_volume_shares_gauge.remove(
                    share_id, instance_id, share_name, share_status)

        with self.missing_volumes_lock:
            self.missing_volumes = update_records(self.missing_volumes,
                                                  missing_volumes)

    def process_offline_volumes(self, offline_volume_list, dry_run=True):
        """ offline volume

        @params offline_volumes:
            List[Volume]

        Volume: Dict[Keys['volume', 'vserver', 'filer'], Any]
        """

        _offline_volumes = {}
        for vol in offline_volume_list:
            if vol['volume'].startswith('share'):
                instance_id = vol['volume'][6:].replace('_', '-')
                _offline_volumes[instance_id] = vol

        # find associated share for offline volumes
        _shares = self._query_shares_by_instance_ids(
            list(_offline_volumes.keys()))
        for s in _shares:
            instance_id = s['instance_id']
            if instance_id in _offline_volumes:
                _offline_volumes[instance_id].update({'share': s})

        # ignore the shares that are updated/deleted recently
        _offline_volume_keys = list(_offline_volumes.keys())
        for vol_key, vol in _offline_volumes.items():
            share = vol.get('share')
            if share is not None:
                if share['deleted_at'] or share['updated_at']:
                    if is_utcts_recent(
                            share['deleted_at'] or share['updated_at'],
                            6 * 3600):
                        _offline_volume_keys.remove(vol_key)

        # process remaining volume
        offline_volumes = {}
        for vol_key in _offline_volume_keys:
            vol = _offline_volumes[vol_key]
            name, filer, vserver = vol['volume'], vol['filer'], vol['vserver']
            share = vol.get('share')
            if share is not None:
                share_id, status = share['share_id'], share['status']
            else:
                share_id, status = '', ''

            self.manila_offline_volumes_gauge.labels(
                share_id=share_id,
                share_status=status,
                volume=name,
                vserver=vserver,
                filer=filer,
            ).set(1)

            offline_volumes[name] = {
                'volume': name,
                'filer': filer,
                'vserver': vserver,
                'share_id': share_id,
                'status': status,
            }

        for volname, vol in self.offline_volumes.items():
            if volname not in offline_volumes:
                self.manila_offline_volumes_gauge.remove(
                    vol['share_id'], vol['status'], vol['filer'],
                    vol['vserver'], vol['name'])

        with self.offline_volumes_lock:
            self.offline_volumes = update_records(self.offline_volumes,
                                                  offline_volumes)

    def process_orphan_volumes(self, volumes, dry_run=True):
        """ orphan volumes

        Check if the corresponding manila shares are deleted recently (hard coded as 6 hours).
        @params volumes: Dict[(FilerName, InstanceId), Volume]
        """
        # share instance id
        # volume key (extracted from volume name) is manila instance id
        vol_keys = list(volumes.keys())

        # Shares: List[Share])
        # Share.Keys: share_id, instance_id, deleted_at, status
        shares = self._query_shares_by_instance_ids(
            [instance_id for (_, instance_id) in vol_keys])

        # merge share into volume
        r = re.compile('^manila-share-netapp-(?P<filer>.+)@(?P=filer)#.*')
        for s in shares:
            m = r.match(s['host'])
            if m:
                filer = m.group('filer')
            else:
                continue
            if (filer, s['instance_id']) in volumes:
                volumes[(filer, s['instance_id'])].update({'share': s})

        # loop over vol
        for (filer, instance_id), vol in volumes.items():
            # double check if the manila shares are deleted recently
            if 'share' in vol:
                share = vol['share']
                deleted_at = share.get('deleted_at', None)
                if deleted_at is not None:
                    if (datetime.utcnow() -
                            deleted_at).total_seconds() < 6 * 3600:
                        vol_keys.remove((filer, instance_id))

        orphan_volumes = {}
        for vol_key in vol_keys:
            vol = volumes[vol_key]
            volume, vserver, filer = vol['volume'], vol['vserver'], vol[
                'filer']
            if 'share' in vol:
                share_id = vol['share']['share_id']
                share_deleted = vol['share']['deleted']
                share_deleted_at = vol['share']['deleted_at']
                instance_id = vol['share']['instance_id']
                instance_status = vol['share']['status']
            else:
                share_id, share_deleted, share_deleted_at, instance_id, instance_status = None, None, None, None, ''

            self.manila_orphan_volumes_gauge.labels(
                share_id=share_id,
                share_status=instance_status,
                filer=filer,
                vserver=vserver,
                volume=volume,
            ).set(1)

            orphan_volumes[vol_key] = {
                'filer': filer,
                'vserver': vserver,
                'volume': volume,
                'share_id': share_id,
                'share_deleted': share_deleted,
                'share_deleted_at': share_deleted_at,
                'instance_id': instance_id,
                'instance_status': instance_status,
            }

        for k, vol in self.orphan_volumes.items():
            if k not in orphan_volumes:
                self.manila_orphan_volumes_gauge.remove(
                    vol['share_id'], vol['instance_status'], vol['filer'],
                    vol['vserver'], vol['volume'])

        with self.orphan_volumes_lock:
            self.orphan_volumes = update_records(self.orphan_volumes,
                                                 orphan_volumes)

    def _get_netapp_volumes(self, status='online'):
        """ get netapp volumes from prometheus metrics
        return [<vol>, <vol>, ...]
        """
        def _merge_dicts(dict_a, dict_b):
            dict_a.update(dict_b)
            return dict_a

        def _filter_labels(vol):
            return {
                'volume': vol['volume'],
                'vserver': vol['vserver'],
                'filer': vol['filer'],
            }

        if status == 'online':
            query = "netapp_volume_total_bytes{app='netapp-capacity-exporter-manila'} + "\
                    "netapp_volume_snapshot_reserved_bytes"
            results = self._fetch_prom_metrics(query)
            return [
                _merge_dicts(_filter_labels(vol['metric']),
                             {'size': int(vol['value'][1]) / ONEGB})
                for vol in results
            ]

        if status == 'offline':
            query = "netapp_volume_state{app='netapp-capacity-exporter-manila'}==3"
            results = self._fetch_prom_metrics(query)
            return [_filter_labels(vol['metric']) for vol in results]

    def _fetch_prom_metrics(self, query):
        try:
            r = requests.get(self.prom_host,
                             params={
                                 'query': query,
                                 'time': time.time()
                             })
        except Exception as e:
            raise type(e)(f'_fetch_prom_metrics(query=\"{query}\"): {e}')
        if r.status_code != 200:
            return None
        return r.json()['data']['result']

    def _query_shares_by_instance_ids(self, instance_ids):
        """
        @return List[Share]

        Share: Dict[Keys['share_id', 'instance_id', 'created_at', 'updated_at', 'deleted_at',
                         'deleted', 'status', 'host'], Any]
        """
        shares_t = Table('shares', self.db_metadata, autoload=True)
        instances_t = Table('share_instances', self.db_metadata, autoload=True)
        q = select([shares_t.c.id.label('share_id'),
                    shares_t.c.created_at,
                    shares_t.c.updated_at,
                    shares_t.c.deleted_at,
                    shares_t.c.deleted,
                    instances_t.c.status,
                    instances_t.c.id.label('instance_id'),
                    instances_t.c.host,
                    ])\
            .where(shares_t.c.id == instances_t.c.share_id)\
            .where(instances_t.c.id.in_(instance_ids))
        r = q.execute()
        return [dict(zip(r.keys(), x)) for x in r.fetchall()]

    def _query_shares(self):
        """ Get shares that are not deleted """

        shares = Table('shares', self.db_metadata, autoload=True)
        instances = Table('share_instances', self.db_metadata, autoload=True)

        stmt = select([shares.c.id,
                       shares.c.display_name,
                       shares.c.size,
                       shares.c.created_at,
                       shares.c.updated_at,
                       instances.c.id,
                       instances.c.status,
                       instances.c.host,
                       ])\
            .select_from(
                shares.join(instances, shares.c.id == instances.c.share_id))\
            .where(shares.c.deleted == 'False')

        shares = []
        for (sid, name, size, ctime, utime, siid, status,
             host) in stmt.execute():
            shares.append({
                'id': sid,
                'name': name,
                'size': size,
                'created_at': ctime,
                'updated_at': utime,
                'instance_id': siid,
                'status': status,
                'host': host,
            })
        return shares

    def _merge_share_and_volumes(self, shares, volumes):
        """ Merge shares and volumes by share id and volume name

        Assuming the volume name is `share_[share_instance_id]`. Update the share object
        with the volume fields ("filer", "vserver", "volume", "volume_size").

        Args:
            shares: List[]
            volumes: List[]

        Return:
            (shares, volumes): merged shares and unmerged volumes

            shares: Dict[(ShareId, InstanceId): Share]
            volumes: Dict[VolumeName: Volume]
        """
        r = re.compile('^manila-share-netapp-(?P<filer>.+)@(?P=filer)#.*')
        _shares = {(s['id'], s['instance_id']): s for s in shares}
        _volumes = {(vol['filer'], vol['volume'][6:].replace('_', '-')): vol
                    for vol in volumes if vol['volume'].startswith('share_')}
        for (share_id, instance_id), share in _shares.items():
            m = r.match(share['host'])
            if m:
                filer = m.group('filer')
                vol = _volumes.pop((filer, instance_id), None)
            else:
                continue
            if vol:
                _shares[(share_id, instance_id)].update({'volume': vol})
        return _shares, _volumes

    def set_share_size(self, share_id, share_size):
        now = datetime.utcnow()
        shares_t = Table('shares', self.db_metadata, autoload=True)
        share_instances_t = Table('share_instances',
                                  self.db_metadata,
                                  autoload=True)
        update(shares_t) \
            .values(updated_at=now, size=share_size) \
            .where(shares_t.c.id == share_instances_t.c.share_id) \
            .where(and_(shares_t.c.id == share_id,
                        share_instances_t.c.status == 'available')) \
            .execute()

    def _reset_share_state(self, share_id, state):
        try:
            self.manilaclient.shares.reset_state(share_id, state)
        except Exception as e:
            log.exception("_reset_share_state(share_id=%s, state=%s): %s",
                          share_id, state, e)

    @response
    def get_orphan_volumes(self):
        with self.orphan_volumes_lock:
            orphan_volumes = list(self.orphan_volumes.values())
        return orphan_volumes

    @response
    def get_offline_volumes(self):
        with self.offline_volumes_lock:
            offline_volumes = list(self.offline_volumes.values())
        return offline_volumes

    @response
    def get_missing_volume_shares(self):
        with self.missing_volumes_lock:
            missing_volumes = list(self.missing_volumes.values())
        return sorted(missing_volumes, key=lambda v: v['share_id'])
示例#14
0
    items = format['items']
    return items


while 1:
    config.load_incluster_config()
    k8s_api_obj = client.CoreV1Api()
    nss = get_items(k8s_api_obj.list_namespace())
    for i in nss:
        ns = i['metadata']['name']
        pods = get_items(k8s_api_obj.list_namespaced_pod(ns))
        pvcs = get_items(
            k8s_api_obj.list_namespaced_persistent_volume_claim(ns))
        for p in pods:
            for vc in p['spec']['volumes']:
                if vc['persistent_volume_claim']:
                    pvc = vc['persistent_volume_claim']['claim_name']
                    for v in pvcs:
                        if v['metadata']['name'] == pvc:
                            vol = v['spec']['volume_name']
                    pod = p['metadata']['name']
                    print("PVC: %s, VOLUME: %s, POD: %s" % (pvc, vol, pod))
                    if pvc in pool.keys():
                        g.remove(pvc, pool[pvc][0], pool[pvc][1])
                        g.labels(pvc, vol, pod)
                        pool[pvc] = [vol, pod]
                    else:
                        g.labels(pvc, vol, pod)
                        pool[pvc] = [vol, pod]
    sleep(15)
g = Gauge('pvc_mapping', 'fetching the mapping between pod and pvc',
          ['persistentvolumeclaim', 'mountedby'])
pool = {}
while 1:
    config.load_incluster_config()
    k8s_api_obj = client.CoreV1Api()
    ret = k8s_api_obj.list_namespace()
    ret = ret.to_dict()
    ret = ret['items']
    for i in ret:
        na = i['metadata']['name']
        print(na)
        pods = k8s_api_obj.list_namespaced_pod(na)
        pods = pods.to_dict()
        pods = pods['items']
        for p in pods:
            for v in p['spec']['volumes']:
                if v['persistent_volume_claim']:
                    pvc = v['persistent_volume_claim']['claim_name']
                    pod = p['metadata']['name']
                    print(pvc, pod)
                    #g.labels(pvc,pod).set(1)
                    if pvc in pool.keys():
                        g.remove(pvc, pool[pvc])
                        g.labels(pvc, pod)
                        pool[pvc] = pod
                    else:
                        g.labels(pvc, pod)
                        pool[pvc] = pod
    sleep(15)
示例#16
0
class HTTPRequestMetric(threading.Thread):

    def __init__(self, result_q, q_timeout, port, timeout, **kwargs):
        super().__init__(**kwargs)
        self.metrics = dict()
        self.m1 = Gauge(COMPUTER_STATE, "PC state", ["uid", "statename"])
        self.m2 = Gauge(HOST_UPTIME, "Host uptime", ["uid", "hostname", "ip", "domainname", "versionsystem"])
        self.m3 = Gauge(USER_UPTIME, "User uptime", ["uid", "hostname", "ip", "domainname", "username", "versionsystem"])
        self._port = port
        self._timeout = timeout
        self._stopped = False
        start_http_server(self._port)
        self.m1_old = []
        self.m2_old = []
        self.m3_old = []
        self.state_off = {}
        #
        self.result_q = result_q
        self.timeout = q_timeout
        self._seconds = 0

    def stop(self):
        self._stopped = True

    def clear_metrics(self):
        for el in self.m1_old:
            self.m1.remove(*el)
        self.m1_old = []
        for el in self.m2_old:
            self.m2.remove(*el)
        self.m2_old = []
        for el in self.m3_old:
            self.m3.remove(*el)
        self.m3_old = []

    #
    def check_state(self):
        for key in self.metrics:
            if self.metrics[key]["state"] == STATE_OFF:
                continue
            if self.metrics[key]["state"] == STATE_UNKNOWN:
                if datetime.datetime.now().timestamp() - self.metrics[key]["time_last_action"] > STATE_TIMEOUT_OFF:
                    self.metrics[key]["state"] = STATE_OFF
            else:
                if datetime.datetime.now().timestamp() - self.metrics[key]["time_last_action"] > STATE_TIMEOUT_UNKNOWN:
                    self.metrics[key]["state"] = STATE_UNKNOWN

    #
    def read_queue(self):
        self._seconds += self._timeout
        if self._seconds > 60:
            self._seconds = 0
            self.check_state()
        try:
            data = self.result_q.get(block=True, timeout=self.timeout)
            self.metrics[data["uid"]] = data.copy()
            self.result_q.task_done()
        except queue.Empty:
            pass
        except:
            self.result_q.task_done()

    #
    def make_metrics(self):
        for key, val in self.metrics.items():
            self.m1.labels(key, STATE[val["state"]]).set(1)
            self.m1_old.append((key, STATE[val["state"]]))
            if val["state"] != STATE_OFF and self.state_off.get(key, 0) < STATE_TIMEOUT_UNKNOWN:
                self.m2.labels(key, val["hostname"], val["ip"], val["domainname"], val["versionsystem"]).set(val["host_uptime"])
                self.m2_old.append((key, val["hostname"], val["ip"], val["domainname"], val["versionsystem"]))
                if val["username"]:
                    self.m3.labels(key, val["hostname"], val["ip"], val["domainname"], val["username"], val["versionsystem"]).set(
                        val["user_uptime"])
                    self.m3_old.append((key, val["hostname"], val["ip"], val["domainname"], val["username"], val["versionsystem"]))
                    self.state_off[key] = \
                        self.state_off.get(key, 0) + self._timeout if val["state"] == STATE_UNKNOWN else 0
            elif val["state"] == STATE_ON:
                self.state_off[key] = 0

    def run(self):
        """Основной цикл обработки данных"""
        while not self._stopped:
            self.read_queue()
            self.clear_metrics()
            self.make_metrics()
            time.sleep(self._timeout)
示例#17
0
class PrometheusDB(BasePrometheusDB):
    """
    Database that expose received data as metric in order to be scrapped by a prometheus instance
    Could only be used with a pusher actor
    """
    def __init__(self, report_type: Type[Report], port: int, address: str,
                 metric_name: str, metric_description: str,
                 aggregation_periode: int, tags: List[str]):
        """
        :param address:             address that expose the metric
        :param port:
        :param metric_name:
        :param metric_description:  short sentence that describe the metric
        :param aggregation_periode: number of second for the value must be aggregated before compute statistics on them
        :param tags: metadata used to tag metric
        """
        BasePrometheusDB.__init__(self, report_type, port, address,
                                  metric_name, metric_description, tags)
        self.aggregation_periode = aggregation_periode
        self.final_tags = ['sensor', 'target'] + tags

        self.mean_metric = None
        self.std_metric = None
        self.min_metric = None
        self.max_metric = None

        self.exposed_measure = {}
        self.measure_for_current_period = {}
        self.current_period_end = 0

        self.buffer = StatBuffer(aggregation_periode)

    def __iter__(self):
        raise NotImplementedError()

    def _init_metrics(self):
        self.mean_metric = Gauge(self.metric_name + '_mean',
                                 self.metric_description + '(MEAN)',
                                 self.final_tags)
        self.std_metric = Gauge(self.metric_name + '_std',
                                self.metric_description + '(STD)',
                                self.final_tags)
        self.min_metric = Gauge(self.metric_name + '_min',
                                self.metric_description + '(MIN)',
                                self.final_tags)
        self.max_metric = Gauge(self.metric_name + '_max',
                                self.metric_description + '(MAX)',
                                self.final_tags)

    def _expose_data(self, key):
        aggregated_value = self.buffer.get_stats(key)
        if aggregated_value is None:
            return

        kwargs = {
            label: aggregated_value['tags'][label]
            for label in self.final_tags
        }
        try:
            self.mean_metric.labels(**kwargs).set(aggregated_value['mean'])
            self.std_metric.labels(**kwargs).set(aggregated_value['std'])
            self.min_metric.labels(**kwargs).set(aggregated_value['min'])
            self.max_metric.labels(**kwargs).set(aggregated_value['max'])
        except TypeError:
            self.mean_metric.labels(kwargs).set(aggregated_value['mean'])
            self.std_metric.labels(kwargs).set(aggregated_value['std'])
            self.min_metric.labels(kwargs).set(aggregated_value['min'])
            self.max_metric.labels(kwargs).set(aggregated_value['max'])

    def _report_to_measure_and_key(self, report):
        value = self.report_type.to_prometheus(report, self.tags)
        key = ''.join([str(value['tags'][tag]) for tag in self.final_tags])
        return key, value

    def _update_exposed_measure(self):
        updated_exposed_measure = {}

        for key in self.exposed_measure:
            if key not in self.measure_for_current_period:
                args = self.exposed_measure[key]
                self.mean_metric.remove(*args)
                self.std_metric.remove(*args)
                self.min_metric.remove(*args)
                self.max_metric.remove(*args)
            else:
                updated_exposed_measure[key] = self.exposed_measure[key]
        self.exposed_measure = updated_exposed_measure

    def _append_measure_from_old_period_to_buffer_and_expose_data(self):
        for old_key, old_measure_list in self.measure_for_current_period.items(
        ):
            for old_measure in old_measure_list:
                self.buffer.append(old_measure, old_key)
            self._expose_data(old_key)

    def _reinit_persiod(self, new_measure_time):
        self.current_period_end = new_measure_time + self.aggregation_periode
        self.measure_for_current_period = {}

    def save(self, report: Report):
        """
        Override from BaseDB

        :param report: Report to save
        """
        key, measure = self._report_to_measure_and_key(report)
        if measure['time'] > self.current_period_end:
            self._append_measure_from_old_period_to_buffer_and_expose_data()
            self._update_exposed_measure()
            self._reinit_persiod(measure['time'])

        if key not in self.exposed_measure:
            args = [measure['tags'][label] for label in self.final_tags]
            self.exposed_measure[key] = args

        if key not in self.measure_for_current_period:
            self.measure_for_current_period[key] = []

        self.measure_for_current_period[key].append(measure)

    def save_many(self, reports: List[Report]):
        """
        Save a batch of data

        :param reports: Batch of data.
        """
        for report in reports:
            self.save(report)
class runnerExports:
    def __init__(self):
        # Define metrics to expose
        self.metric_runner_org_status = Gauge(
            "github_runner_org_status",
            "Runner status",
            ["name", "id", "os", "labels", "status"],
        )
        self.metric_runner_org_label_status = Gauge(
            "github_runner_org_label_status",
            "Runner label status",
            ["name", "id", "os", "label", "status"],
        )

        self.metric_runner_org_busy = Gauge(
            "github_runner_org_busy",
            "Runner busy status",
            ["name", "id", "os", "labels", "busy"],
        )

    def export_metrics(self, runner_list: list):
        current_runners = []

        for runner in runner_list:
            agg_labels = self.aggregate_labels(runner["labels"])
            # Export metrics
            self.export_runner_status(runner, agg_labels)
            self.export_runner_busy(runner, agg_labels)
            # Updated active runners list
            current_runners.append(str(runner["id"]))

        self.ghostbuster(current_runners)

    def ghostbuster(self, current_runners):
        """
            Case some runner is deleted this function will remove from the metrics
        """
        # Remove ghosts form metric_runner_org_status metric
        runners_to_remove = []
        for (
                runner_name,
                runner_id,
                runner_os,
                labels,
                runner_status,
        ) in self.metric_runner_org_status._metrics:
            if runner_id not in current_runners:
                runners_to_remove.append(
                    (runner_name, runner_id, runner_os, labels, runner_status))
        for (
                runner_name,
                runner_id,
                runner_os,
                labels,
                runner_status,
        ) in runners_to_remove:
            self.metric_runner_org_status.remove(runner_name, runner_id,
                                                 runner_os, labels,
                                                 runner_status)
        # Remove ghosts form metric_runner_org_label_status metric
        runners_to_remove = []
        for (
                runner_name,
                runner_id,
                runner_os,
                runner_label,
                runner_status,
        ) in self.metric_runner_org_label_status._metrics:
            if runner_id not in current_runners:
                runners_to_remove.append((runner_name, runner_id, runner_os,
                                          runner_label, runner_status))
        for (
                runner_name,
                runner_id,
                runner_os,
                runner_label,
                runner_status,
        ) in runners_to_remove:
            self.metric_runner_org_label_status.remove(runner_name, runner_id,
                                                       runner_os, runner_label,
                                                       runner_status)
        # Remove ghosts form metric_runner_org_busy metric
        runners_to_remove = []
        for (
                runner_name,
                runner_id,
                runner_os,
                labels,
                runner_busy,
        ) in self.metric_runner_org_busy._metrics:
            if runner_id not in current_runners:
                runners_to_remove.append(
                    (runner_name, runner_id, runner_os, labels, runner_busy))
        for runner_name, runner_id, runner_os, labels, runner_busy in runners_to_remove:
            self.metric_runner_org_busy.remove(runner_name, runner_id,
                                               runner_os, labels, runner_busy)

    def aggregate_labels(self, labels: dict):
        """
            Aggregate the runners labels in string
        """
        agg_labels = []
        for label in labels:
            if label["type"] == "custom":
                agg_labels.append(label["name"])

        return ",".join(agg_labels)

    def export_runner_status(self, runner: dict, agg_labels: str):
        online = 1
        offline = 0
        if runner.get("status") != "online":
            online = 0
            offline = 1

        self.metric_runner_org_status.labels(runner.get("name"),
                                             runner.get("id"),
                                             runner.get("os"), agg_labels,
                                             "online").set(online)
        self.metric_runner_org_status.labels(
            runner.get("name"),
            runner.get("id"),
            runner.get("os"),
            agg_labels,
            "offline",
        ).set(offline)

        for label in runner["labels"]:
            self.metric_runner_org_label_status.labels(
                runner.get("name"),
                runner.get("id"),
                runner.get("os"),
                label["name"],
                "online",
            ).set(online)

            self.metric_runner_org_label_status.labels(
                runner.get("name"),
                runner.get("id"),
                runner.get("os"),
                label["name"],
                "offline",
            ).set(offline)

    def export_runner_busy(self, runner: dict, agg_labels: str):
        idle = 1
        busy = 0

        if runner.get("busy") == True:
            idle = 0
            busy = 1

        self.metric_runner_org_busy.labels(runner.get("name"),
                                           runner.get("id"), runner.get("os"),
                                           agg_labels, "true").set(busy)

        self.metric_runner_org_busy.labels(runner.get("name"),
                                           runner.get("id"), runner.get("os"),
                                           agg_labels, "false").set(idle)
示例#19
0
class metric_labels:
    def __init__(self, name, labels, values=None, description=None):
        self.name = name
        self.values = dict()
        self.labels = labels
        if description is None:
            description = name.replace("_", " ")
        self.metric = Gauge(name.lower(), description, labels)
        if not values is None:
            self.update(values)
        self.label_sets = list()

    def __zero_missing_value(self, value):
        if isinstance(value, dict):
            for label in value:
                value[label] = self.__zero_missing_value(value[label])
        else:
            value = 0
        return value

    def __remove_empty_values(self, values):
        removeable_values = list()
        if isinstance(values, dict):
            for label in values:
                if not isinstance(values[label], dict):
                    if values[label] < 1:
                        removeable_values.append(label)
                else:
                    values[label] = self.__remove_empty_values(values[label])
                    if not values[label]:
                        removeable_values.append(label)
        for labels in removeable_values:
            del values[label]
        return values

    def __remove_empty_label_sets(self, values, labels=None):
        if not labels:
            labels = list()
        if isinstance(values, dict):
            for label in values:
                labels_new = labels.copy()
                labels_new.append(label)
                self.__remove_empty_label_sets(values[label], labels_new)
        else:
            if values < 1:
                if labels in self.label_sets:
                    self.metric.remove(*labels)
                    del self.label_sets[labels]

    def __update_old_values(self, old_values, values):
        for label in old_values:
            if not label in values:
                old_values[label] = self.__zero_missing_value(
                    old_values[label])
            else:
                if isinstance(old_values[label], dict):
                    old_values[label] = self.__update_old_values(
                        old_values[label], values[label])
        return old_values

    def __add_new_values(self, old_values, values):

        for label in values:
            if not isinstance(values[label], dict):
                old_values[label] = values[label]
            else:
                if label in old_values:
                    old_values[label] = self.__add_new_values(
                        old_values[label], values[label])
                else:
                    old_values[label] = values[label]

        return old_values

    def __update_metrics(self, values, labels=None):
        for label in values:
            labels_tmp = list()
            if not labels is None:
                for i in labels:
                    labels_tmp.append(i)
            labels_tmp.append(label)

            if not isinstance(values[label], dict):
                if not labels_tmp in self.label_sets:
                    self.label_sets.append(labels_tmp)
                self.metric.labels(*labels_tmp).set(values[label])
                labels_tmp.pop()
            else:
                self.__update_metrics(values[label], labels_tmp)

    def __add_value_dict(self, d, items, value):
        if len(items) > 1:
            if not items[0] in d:
                d[items[0]] = dict()
            current = items[0]
            items.pop(0)
            d[current] = self.__add_value_dict(d[current], items, value)
        else:
            d[items[0]] = value
        return d

    def get_value(self):
        return self.values

    def get_name(self):
        return self.name

    def get_labels(self):
        return self.labels

    def update(self, values):
        if isinstance(values, list):
            values_new = dict()
            for v in values:
                v_temp = v[:len(v) - 1]
                metric_value = v[len(v) - 1]
                values_new = self.__add_value_dict(values_new, v_temp,
                                                   metric_value)
                values = values_new
        self.values = self.__add_new_values(self.values, values)
        self.values = self.__update_old_values(self.values, values)
        self.__remove_empty_label_sets(self.values)
        self.values = self.__remove_empty_values(self.values)
        self.__update_metrics(self.values)
                  ["user", "host", "db", "permission"])

    try:
        while (True):
            MySQLStats = MySQLUserInformation(db)
            counter = 0

            for users in MySQLStats.GetUsers():
                for permission in users['Permission'].items():
                    gauge.labels(users['User'], users["Host"], users["DB"],
                                 permission[0]).set(permission[1])
                counter += 1
            sleep(int(config.GetWebServerConfiguration()['refresh']))
            ''' Removes gauges
      Why do we need to remove them?
      I Discovered a problem when a MySQL user is removed or loses all permissions. each gauge 
      representing that users permissions are no longer generated by the previous loop. However, 
      the gauge holds on to the previous versions of the gauges and continues to display them
      when queried. This resulted in deleted users still showing up when queried with old values.
      '''
            for users in MySQLStats.GetUsers():
                for permission in users['Permission'].items():
                    gauge.remove(users['User'], users["Host"], users["DB"],
                                 permission[0])

            # Cleanup
            del MySQLStats

    finally:
        db.close()
示例#21
0
class DirectPrometheusDB(BasePrometheusDB):
    """
    Database that expose received data as metric in order to be scrapped by a prometheus instance
    Could only be used with a pusher actor
    """
    def __init__(self, report_type: Type[Report], port: int, address: str,
                 metric_name: str, metric_description: str, tags: List[str]):
        """
        :param address:             address that expose the metric
        :param port:
        :param metric_name:
        :param metric_description:  short sentence that describe the metric
        :param tags: metadata used to tag metric
        """
        BasePrometheusDB.__init__(self, report_type, port, address,
                                  metric_name, metric_description, tags)

        self.energy_metric = None

        self.current_ts = 0
        self.exposed_measure = {}
        self.measure_for_current_period = {}

    def __iter__(self):
        raise NotImplementedError()

    def _init_metrics(self):
        self.energy_metric = Gauge(self.metric_name, self.metric_description,
                                   ['sensor', 'target'] + self.tags)

    def _expose_data(self, _, measure):
        kwargs = {label: measure['tags'][label] for label in measure['tags']}
        try:
            self.energy_metric.labels(**kwargs).set(measure['value'])
        except TypeError:
            self.energy_metric.labels(kwargs).set(measure['value'])

    def _report_to_measure_and_key(self, report):
        value = self.report_type.to_prometheus(report, self.tags)
        key = ''.join([str(value['tags'][tag]) for tag in value['tags']])
        return key, value

    def _update_exposed_measure(self):
        for key in self.exposed_measure:
            if key not in self.measure_for_current_period:
                args = self.exposed_measure[key]
                self.energy_metric.remove(*args)
        self.exposed_measure = self.measure_for_current_period
        self.measure_for_current_period = {}

    def save(self, report: Report):
        """
        Override from BaseDB

        :param report: Report to save
        """
        key, measure = self._report_to_measure_and_key(report)
        if self.current_ts != measure['time']:
            self.current_ts = measure['time']
            self._update_exposed_measure()

        self._expose_data(key, measure)
        if key not in self.measure_for_current_period:
            args = [measure['tags'][label] for label in measure['tags']]
            self.measure_for_current_period[key] = args

    def save_many(self, reports: List[Report]):
        """
        Save a batch of data

        :param reports: Batch of data.
        """
        for report in reports:
            self.save(report)
                pprint(e)
                pprint(result)

    except Exception as e:
        print(f"Got a mystery error for {name}:")
        pprint(e)


if __name__ == "__main__":
    logging.basicConfig(level=logging.DEBUG)
    start_http_server(9402)

    with open(sys.argv[1]) as channel_data:
        channels = json.load(channel_data)

    revisions = {}

    while True:
        for (channel, about) in channels.items():
            measurement = measure_channel(channel)
            if measurement is not None:
                revision = measurement['revision']
                CHANNEL_UPDATE_TIME.labels(channel=channel).set(measurement['timestamp'])
                CHANNEL_REVISION.labels(channel=channel, revision=measurement['revision']).set(1)
                CHANNEL_CURRENT.labels(channel=channel).set(int(about['current']))
                print('updated {}'.format(channel))
                previous_revision = revisions.pop(channel, None)
                revisions[channel] = revision
                if previous_revision and previous_revision != revision:
                    CHANNEL_REVISION.remove(channel, previous_revision)
class sensor_server(object):
    def __init__(self, listen_port, sleep=LOOP_SLEEP_TIME, cmd=CMD):
        self.sleep = sleep
        self.last_seen = defaultdict(lambda: 0)
        start_http_server(listen_port)
        self.acurite_temp = Gauge('acurite_temp',
                                  'acurite temperature in DegF',
                                  ['id', 'model'])
        # self.acurite_temp = Gauge(
        #     'acurite_temp', 'acurite temperature in DegF').set_function(lambda: self.show_temp())
        self.acurite_hum = Gauge('acurite_hum', 'acurite humidity in %RH',
                                 ['id', 'model'])
        self.acurite_battery_low = Gauge('acurite_battery_low',
                                         'acurite battery_low',
                                         ['id', 'model'])
        self.acurite_last_seen = Gauge('acurite_last_seen',
                                       'acurite last_seen', ['id', 'model'])
        self.process = subprocess.Popen(shlex.split(CMD),
                                        stdout=subprocess.PIPE)

    def expire_sensors(self):
        for sensor_id in list(self.last_seen.keys()):
            age = time.time() - self.last_seen[sensor_id]
            if age > METRIC_TTL:
                logging.info('removing stale sensor: %s age: %s', sensor_id,
                             age)
                self.acurite_temp.remove(sensor_id)
                self.acurite_hum.remove(sensor_id)
                self.acurite_battery_low.remove(sensor_id)
                self.acurite_last_seen.remove(sensor_id)
                del self.last_seen[sensor_id]

    def serve_forever(self):
        # TODO: Redo with poll() so we can expire the last sensor
        while True:
            data = json.loads(self.process.stdout.readline())
            # Acurite 986 Sensor uses "battery=OK" instead of "battery_low=0"
            if data.get('battery'):
                battery = data.get('battery')
                if battery == "OK":
                    data['battery_low'] = 0
                else:
                    data['battery_low'] = 1
            logging.debug(data)
            # print(self.metrics)

            sensor_id = data.get('id')
            model = MODEL_MAP.get(data.get('model'))
            self.acurite_temp.labels(id=sensor_id, model=model).set(
                data.get('temperature_F'))
            if data.get('humidity'):
                self.acurite_hum.labels(id=sensor_id,
                                        model=model).set(data.get('humidity'))
            self.acurite_battery_low.labels(id=sensor_id, model=model).set(
                data.get('battery_low'))
            now = time.time()
            self.acurite_last_seen.labels(id=sensor_id, model=model).set(now)
            self.last_seen[sensor_id] = now
            self.expire_sensors()

        logging.debug("sleeping %s...", self.sleep)
        time.sleep(self.sleep)