def check(self, instance): lowtags = get_tags("test_entity", False) self.gauge("metric.low_card", 1, tags=lowtags) alltags = get_tags("test_entity", True) self.gauge("metric.high_card", 1, tags=alltags) notags = get_tags("404", True) self.gauge("metric.unknown", 1, tags=notags)
def _process_usage_metric(self, m_name, message, cache): """ Takes a metrics message, a metric name, and a cache dict where it will store container_name --> (value, tags) so that _process_limit_metric can compute usage_pct it also submit said value and tags as a gauge. """ # track containers that still exist in the cache seen_keys = {k: False for k in cache} for metric in message.metric: if self._is_container_metric(metric): c_id = self._get_container_id(metric.label) c_name = self._get_container_label(metric.label, 'name') if not c_name: continue pod_uid = self._get_pod_uid(metric.label) if self.container_filter.is_excluded(c_id, pod_uid): continue tags = get_tags('docker://%s' % c_id, True) tags += self.instance_tags # FIXME we are forced to do that because the Kubelet PodList isn't updated # for static pods, see https://github.com/kubernetes/kubernetes/pull/59948 pod = self._get_pod_by_metric_label(metric.label) if pod is not None and is_static_pending_pod(pod): tags += get_tags( 'kubernetes_pod://%s' % pod["metadata"]["uid"], True) tags += self._get_kube_container_name(metric.label) tags = list(set(tags)) val = getattr(metric, METRIC_TYPES[message.type]).value cache[c_name] = (val, tags) seen_keys[c_name] = True self.check.gauge(m_name, val, tags) # purge the cache for k, seen in seen_keys.iteritems(): if not seen: del cache[k]
def _update_container_metrics(self, instance, subcontainer, pod_list, pod_list_utils): is_pod = False in_static_pod = False subcontainer_id = subcontainer.get('id') pod_uid = subcontainer.get('labels', {}).get('io.kubernetes.pod.uid') k_container_name = subcontainer.get('labels', {}).get('io.kubernetes.container.name') # We want to collect network metrics at the pod level if k_container_name == "POD" and pod_uid: is_pod = True # FIXME we are forced to do that because the Kubelet PodList isn't updated # for static pods, see https://github.com/kubernetes/kubernetes/pull/59948 pod = get_pod_by_uid(pod_uid, pod_list) if pod is not None and is_static_pending_pod(pod): in_static_pod = True # Let's see who we have here if is_pod: tags = tags_for_pod(pod_uid, True) elif in_static_pod and k_container_name: # FIXME static pods don't have container statuses so we can't # get the container id with the scheme, assuming docker here tags = tags_for_docker(subcontainer_id, True) tags += tags_for_pod(pod_uid, True) tags.append("kube_container_name:%s" % k_container_name) else: # Standard container cid = pod_list_utils.get_cid_by_name_tuple( (pod.get('metadata', {}).get('namespace', ""), pod.get('metadata', {}).get('name', ""), k_container_name)) if pod_list_utils.is_excluded(cid): self.log.debug("Filtering out " + cid) return tags = get_tags(cid, True) if not tags: self.log.debug("Subcontainer {} doesn't have tags, skipping.".format(subcontainer_id)) return tags = list(set(tags + instance.get('tags', []))) stats = subcontainer['stats'][-1] # take the latest self._publish_raw_metrics(NAMESPACE, stats, tags, is_pod) if is_pod is False and subcontainer.get("spec", {}).get("has_filesystem") and stats.get('filesystem'): fs = stats['filesystem'][-1] fs_utilization = float(fs['usage']) / float(fs['capacity']) self.gauge(NAMESPACE + '.filesystem.usage_pct', fs_utilization, tags=tags) if is_pod and subcontainer.get("spec", {}).get("has_network"): net = stats['network'] self.rate(NAMESPACE + '.network_errors', sum(float(net[x]) for x in NET_ERRORS), tags=tags)
def _report_container_spec_metrics(self, pod_list, instance_tags): """Reports pod requests & limits by looking at pod specs.""" for pod in pod_list['items']: pod_name = pod.get('metadata', {}).get('name') pod_phase = pod.get('status', {}).get('phase') if self._should_ignore_pod(pod_name, pod_phase): continue for ctr in pod['spec']['containers']: if not ctr.get('resources'): continue c_name = ctr.get('name', '') cid = None for ctr_status in pod['status'].get('containerStatuses', []): if ctr_status.get('name') == c_name: # it is already prefixed with 'runtime://' cid = ctr_status.get('containerID') break if not cid: continue pod_uid = pod.get('metadata', {}).get('uid') if self.pod_list_utils.is_excluded(cid, pod_uid): continue tags = get_tags('%s' % cid, True) + instance_tags try: for resource, value_str in iteritems( ctr.get('resources', {}).get('requests', {})): value = self.parse_quantity(value_str) self.gauge( '{}.{}.requests'.format(self.NAMESPACE, resource), value, tags) except (KeyError, AttributeError) as e: self.log.debug( "Unable to retrieve container requests for %s: %s", c_name, e) try: for resource, value_str in iteritems( ctr.get('resources', {}).get('limits', {})): value = self.parse_quantity(value_str) self.gauge( '{}.{}.limits'.format(self.NAMESPACE, resource), value, tags) except (KeyError, AttributeError) as e: self.log.debug( "Unable to retrieve container limits for %s: %s", c_name, e)
def _process_usage_metric(self, m_name, metric, cache, scraper_config): """ Takes a metric object, a metric name, and a cache dict where it will store container_name --> (value, tags) so that _process_limit_metric can compute usage_pct it also submit said value and tags as a gauge. """ # track containers that still exist in the cache seen_keys = {k: False for k in cache} samples = self._sum_values_by_context(metric, self._get_container_id_if_container_metric) for c_id, sample in iteritems(samples): c_name = self._get_container_label(sample[self.SAMPLE_LABELS], 'name') if not c_name: continue pod_uid = self._get_pod_uid(sample[self.SAMPLE_LABELS]) if self.pod_list_utils.is_excluded(c_id, pod_uid): continue tags = get_tags(c_id, True) tags += scraper_config['custom_tags'] # FIXME we are forced to do that because the Kubelet PodList isn't updated # for static pods, see https://github.com/kubernetes/kubernetes/pull/59948 pod = self._get_pod_by_metric_label(sample[self.SAMPLE_LABELS]) if pod is not None and is_static_pending_pod(pod): tags += get_tags('kubernetes_pod://%s' % pod["metadata"]["uid"], True) tags += self._get_kube_container_name(sample[self.SAMPLE_LABELS]) tags = list(set(tags)) val = sample[self.SAMPLE_VALUE] cache[c_name] = (val, tags) seen_keys[c_name] = True self.gauge(m_name, val, tags) # purge the cache for k, seen in iteritems(seen_keys): if not seen: del cache[k]
def _process_pod_rate(self, metric_name, message): """Takes a simple metric about a pod, reports it as a rate.""" if message.type >= len(METRIC_TYPES): self.log.error("Metric type %s unsupported for metric %s" % (message.type, message.name)) return for metric in message.metric: if self._is_pod_metric(metric): pod_uid = self._get_pod_uid(metric.label) if '.network.' in metric_name and self._is_pod_host_networked(pod_uid): continue tags = get_tags('kubernetes_pod://%s' % pod_uid, True) val = getattr(metric, METRIC_TYPES[message.type]).value self.rate(metric_name, val, tags)
def _process_container_rate(self, metric_name, message): """Takes a simple metric about a container, reports it as a rate.""" if message.type >= len(METRIC_TYPES): self.log.error("Metric type %s unsupported for metric %s" % (message.type, message.name)) return for metric in message.metric: if self._is_container_metric(metric): c_id = self._get_container_id(metric.label) tags = get_tags('docker://%s' % c_id, True) # FIXME we are forced to do that because the Kubelet PodList isn't updated # for static pods, see https://github.com/kubernetes/kubernetes/pull/59948 pod = self._get_pod_by_metric_label(metric.label) if pod is not None and self._is_static_pending_pod(pod): tags += get_tags( 'kubernetes_pod://%s' % pod["metadata"]["uid"], True) tags += self._get_tags_from_labels(metric.label) tags = list(set(tags)) val = getattr(metric, METRIC_TYPES[message.type]).value self.rate(metric_name, val, tags)
def _process_container_metric(self, type, metric_name, metric, scraper_config): """ Takes a simple metric about a container, reports it as a rate or gauge. If several series are found for a given container, values are summed before submission. """ if metric.type not in METRIC_TYPES: self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name)) return samples = self._sum_values_by_context( metric, self._get_entity_id_if_container_metric) for c_id, sample in iteritems(samples): pod_uid = self._get_pod_uid(sample[self.SAMPLE_LABELS]) if self.pod_list_utils.is_excluded(c_id, pod_uid): continue tags = get_tags(c_id, True) tags += scraper_config['custom_tags'] # FIXME we are forced to do that because the Kubelet PodList isn't updated # for static pods, see https://github.com/kubernetes/kubernetes/pull/59948 pod = self._get_pod_by_metric_label(sample[self.SAMPLE_LABELS]) if pod is not None and is_static_pending_pod(pod): tags += get_tags( 'kubernetes_pod://%s' % pod["metadata"]["uid"], True) tags += self._get_kube_container_name( sample[self.SAMPLE_LABELS]) tags = list(set(tags)) val = sample[self.SAMPLE_VALUE] if "rate" == type: self.rate(metric_name, val, tags) elif "gauge" == type: self.gauge(metric_name, val, tags)
def factcheck(self, txt): tokens = tagger.tokenize(txt) tags = tagger.get_tags(self.tgr, tokens) answers = [] for extractor in [ tagger.extract_player_relation, tagger.extract_age_relation ]: relation = extractor(tags) if relation: print 'found relation', relation ans = db.trueOrFalse( rb_sports.SportsClaim(relation[0], tuple(relation[1:]))) print ans if ans != None: answers.append(ans) return str(len(filter(lambda x: not x, answers)) == 0)
def _process_pod_rate(self, metric_name, metric, scraper_config): """ Takes a simple metric about a pod, reports it as a rate. If several series are found for a given pod, values are summed before submission. """ if metric.type not in METRIC_TYPES: self.log.error("Metric type %s unsupported for metric %s" % (metric.type, metric.name)) return samples = self._sum_values_by_context(metric, self._get_pod_uid_if_pod_metric) for pod_uid, sample in iteritems(samples): if '.network.' in metric_name and self._is_pod_host_networked(pod_uid): continue tags = get_tags('kubernetes_pod://%s' % pod_uid, True) tags += scraper_config['custom_tags'] val = sample[self.SAMPLE_VALUE] self.rate(metric_name, val, tags)
def _report_pods_running(self, pods, instance_tags): """ Reports the number of running pods on this node tagged by service and creator. """ tag_counter = {} for pod in pods['items']: pod_id = pod.get('metadata', {}).get('uid') tags = get_tags('kubernetes_pod://%s' % pod_id, False) or None if not tags: continue hash_tags = tuple(sorted(tags)) if hash_tags in tag_counter.keys(): tag_counter[hash_tags] += 1 else: tag_counter[hash_tags] = 1 for tags, count in tag_counter.iteritems(): self.gauge(self.NAMESPACE + '.pods.running', count, list(tags))
def _process_pod_rate(self, metric_name, message): """ Takes a simple metric about a pod, reports it as a rate. If several series are found for a given pod, values are summed before submission. """ if message.type >= len(METRIC_TYPES): self.log.error("Metric type %s unsupported for metric %s" % (message.type, message.name)) return metrics = self._sum_values_by_context(message, self._get_pod_uid_if_pod_metric) for pod_uid, metric in metrics.iteritems(): if '.network.' in metric_name and self._is_pod_host_networked( pod_uid): continue tags = get_tags('kubernetes_pod://%s' % pod_uid, True) tags += self.instance_tags val = getattr(metric, METRIC_TYPES[message.type]).value self.check.rate(metric_name, val, tags)
def _report_container_state_metrics(self, pod_list, instance_tags): """Reports container state & reasons by looking at container statuses""" if pod_list.get('expired_count'): self.gauge(self.NAMESPACE + '.pods.expired', pod_list.get('expired_count'), tags=instance_tags) for pod in pod_list['items']: pod_name = pod.get('metadata', {}).get('name') pod_uid = pod.get('metadata', {}).get('uid') if not pod_name or not pod_uid: continue for ctr_status in pod['status'].get('containerStatuses', []): c_name = ctr_status.get('name') cid = ctr_status.get('containerID') if not c_name or not cid: continue if self.pod_list_utils.is_excluded(cid, pod_uid): continue tags = get_tags('%s' % cid, True) + instance_tags restart_count = ctr_status.get('restartCount', 0) self.gauge(self.NAMESPACE + '.containers.restarts', restart_count, tags) for (metric_name, field_name) in [('state', 'state'), ('last_state', 'lastState')]: c_state = ctr_status.get(field_name, {}) for state_name in ['terminated', 'waiting']: state_reasons = WHITELISTED_CONTAINER_STATE_REASONS.get( state_name, []) self._submit_container_state_metric( metric_name, state_name, c_state, state_reasons, tags)
def _process_limit_metric(self, m_name, metric, cache, scraper_config, pct_m_name=None): """ Reports limit metrics if m_name is not an empty string, and optionally checks in the given cache if there's a usage for each sample in the metric and reports the usage_pct """ samples = self._sum_values_by_context( metric, self._get_entity_id_if_container_metric) for c_id, sample in iteritems(samples): limit = sample[self.SAMPLE_VALUE] pod_uid = self._get_pod_uid(sample[self.SAMPLE_LABELS]) if self.pod_list_utils.is_excluded(c_id, pod_uid): continue tags = get_tags(c_id, True) tags += scraper_config['custom_tags'] if m_name: self.gauge(m_name, limit, tags) if pct_m_name and limit > 0: c_name = self._get_container_label(sample[self.SAMPLE_LABELS], 'name') if not c_name: continue usage, tags = cache.get(c_name, (None, None)) if usage: self.gauge(pct_m_name, float(usage / float(limit)), tags) else: self.log.debug( "No corresponding usage found for metric %s and " "container %s, skipping usage_pct for now." % (pct_m_name, c_name))
def _process_usage_metric(self, m_name, message, cache): """ Takes a metrics message, a metric name, and a cache dict where it will store container_name --> (value, tags) so that _process_limit_metric can compute usage_pct it also submit said value and tags as a gauge. """ # track containers that still exist in the cache seen_keys = {k: False for k in cache} for metric in message.metric: if self._is_container_metric(metric): c_id = self._get_container_id(metric.label) c_name = self._get_container_label(metric.label, 'name') if not c_name: continue tags = get_tags('docker://%s' % c_id, True) val = getattr(metric, METRIC_TYPES[message.type]).value cache[c_name] = (val, tags) seen_keys[c_name] = True self.gauge(m_name, val, tags) # purge the cache for k, seen in seen_keys.iteritems(): if not seen: del cache[k]
else: continue articles.append({ 'Title': article['title'], 'Authors': article['author'], 'Date': article['publishedAt'], 'Text': condensed_text, 'Metrics': summarizer.condense_metrics(condensed_text), 'Image': image, 'Url': article['url'], 'Source': article['source']['name'], 'Tags': get_tags(article['title'], 4) }) except: pass # throw out any news article with a missing field news_db = shelve.open('news') news_db.clear() news_db['data'] = articles news_db.close() print("Successfully Scraped") time.sleep(3600)
# Unless explicitly stated otherwise all files in this repository are licensed # under the Apache License Version 2.0. # This product includes software developed at Datadog (https://www.datadoghq.com/). # Copyright 2019-present Datadog, Inc. from __future__ import print_function import aggregator import tagger if __name__ == "__main__": aggregator.submit_metric(None, 'id', aggregator.GAUGE, 'name', -99.0, ['foo', 'bar'], 'myhost') print("tags returned by tagger: %s\n" % tagger.get_tags("21", True))
def tags_for_pod(pod_id, cardinality): """ Queries the tagger for a given pod uid :return: string array, empty if pod not found """ return get_tags('kubernetes_pod://%s' % pod_id, cardinality)
def check(self, instance): metadata_endpoint = API_ENDPOINT + METADATA_ROUTE stats_endpoint = API_ENDPOINT + STATS_ROUTE custom_tags = instance.get('tags', []) try: request = self.http.get(metadata_endpoint) except requests.exceptions.Timeout: msg = 'Fargate {} endpoint timed out after {} seconds'.format( metadata_endpoint, self.http.options['timeout']) self.service_check('fargate_check', AgentCheck.CRITICAL, message=msg, tags=custom_tags) self.log.exception(msg) return except requests.exceptions.RequestException: msg = 'Error fetching Fargate {} endpoint'.format( metadata_endpoint) self.service_check('fargate_check', AgentCheck.CRITICAL, message=msg, tags=custom_tags) self.log.exception(msg) return if request.status_code != 200: msg = 'Fargate {} endpoint responded with {} HTTP code'.format( metadata_endpoint, request.status_code) self.service_check('fargate_check', AgentCheck.CRITICAL, message=msg, tags=custom_tags) self.log.warning(msg) return metadata = {} try: metadata = request.json() except ValueError: msg = 'Cannot decode Fargate {} endpoint response'.format( metadata_endpoint) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg, exc_info=True) return if not all(k in metadata for k in ['Cluster', 'Containers']): msg = 'Missing critical metadata in {} endpoint response'.format( metadata_endpoint) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg) return container_tags = {} for container in metadata['Containers']: c_id = container['DockerId'] tagger_tags = get_tags('container_id://%s' % c_id, True) or [] # Compatibility with previous versions of the check compat_tags = [] for tag in tagger_tags: if tag.startswith(("task_family:", "task_version:")): compat_tags.append("ecs_" + tag) elif tag.startswith("cluster_name:"): compat_tags.append( tag.replace("cluster_name:", "ecs_cluster:")) elif tag.startswith("container_name:"): compat_tags.append( tag.replace("container_name:", "docker_name:")) container_tags[c_id] = tagger_tags + compat_tags + custom_tags if container.get('Limits', {}).get('CPU', 0) > 0: self.gauge('ecs.fargate.cpu.limit', container['Limits']['CPU'], container_tags[c_id]) try: request = self.http.get(stats_endpoint) except requests.exceptions.Timeout: msg = 'Fargate {} endpoint timed out after {} seconds'.format( stats_endpoint, self.http.options['timeout']) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg, exc_info=True) return except requests.exceptions.RequestException: msg = 'Error fetching Fargate {} endpoint'.format(stats_endpoint) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg, exc_info=True) return if request.status_code != 200: msg = 'Fargate {} endpoint responded with {} HTTP code'.format( stats_endpoint, request.status_code) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg) return stats = {} try: stats = request.json() except ValueError: msg = 'Cannot decode Fargate {} endpoint response'.format( stats_endpoint) self.service_check('fargate_check', AgentCheck.WARNING, message=msg, tags=custom_tags) self.log.warning(msg, exc_info=True) for container_id, container_stats in iteritems(stats): self.submit_perf_metrics(instance, container_tags, container_id, container_stats) self.service_check('fargate_check', AgentCheck.OK, tags=custom_tags)
def tags_for_docker(cid, cardinality): """ Queries the tagger for a given container id :return: string array, empty if container not found """ return get_tags('docker://%s' % cid, cardinality)
import sys import tagger tgr = tagger.initialize_tagger() for line in open(sys.argv[1]): tokens = tagger.tokenize(line) tags = tagger.get_tags(tgr, tokens) relation = tagger.extract_player_relation(tags) print relation