示例#1
0
def get_latest_stat(node, resource):
    try:
        stats = NS.time_series_db_manager.get_plugin().get_metric_stats(
            node, resource, 'latest')
        if stats == "[]" or not stats:
            raise TendrlPerformanceMonitoringException(
                'Stats not yet available in time series db')
        stat = re.search('Current:(.+?)Max', stats)
        if not stat:
            raise TendrlPerformanceMonitoringException(
                'Failed to get latest stat of %s of node %s for summary'
                'Error: Current utilization not found' % (resource, node))
        stat = re.search('Current:(.+?)Max', stats).group(1)
        if math.isnan(float(stat)):
            raise TendrlPerformanceMonitoringException(
                'Received nan for utilization %s of %s' % (resource, node))
        return float(stat)
    except (ValueError, urllib3.exceptions.HTTPError,
            TendrlPerformanceMonitoringException) as ex:
        Event(
            ExceptionMessage(priority="debug",
                             publisher=NS.publisher_id,
                             payload={
                                 "message":
                                 'Failed to get latest stat of %s of '
                                 'node %s for node summary.' %
                                 (resource, node),
                                 "exception":
                                 ex
                             }))
        raise ex
示例#2
0
 def get_aggregated_stats(self,
                          aggregation_type,
                          entity_names,
                          metric_name,
                          time_interval=None,
                          start_time=None,
                          end_time=None):
     target = ''
     for entity_name in entity_names:
         target = '%s%s.%s.%s,' % (target, self.prefix,
                                   entity_name.replace('.',
                                                       '_'), metric_name)
     target = target[:-1]
     if aggregation_type == pm_consts.AVERAGE:
         target = 'averageSeries(%s)' % target
     if time_interval:
         if time_interval == 'latest':
             target = "cactiStyle(%s)" % target
         else:
             start_time = self.parse_time(time_interval)
     if start_time:
         start_time = self.parse_time(start_time)
     if end_time:
         end_time = self.parse_time(end_time)
     url = 'http://%s:%s/render?target=%s&format=json' % (
         self.host, str(self.port), target)
     if start_time:
         url = "%s&from=%s" % (url, start_time)
     if end_time:
         url = "%s&until=%s" % (url, end_time)
     try:
         stats = self.http.request('GET', url, timeout=5)
         if stats.status == 200:
             # TODO(Anmol): remove nulls from graphite data before returning
             # data. Explore the possibility of achieving this using some
             # tuning factor in graphite.
             data = re.sub('\[null, [0-9]+\], ', '', stats.data)
             data = re.sub(', \[null, [0-9]+\]', '', data)
             return data
         else:
             TendrlPerformanceMonitoringException(
                 'Request status code: %s' % str(stats.status))
     except (ValueError, Exception) as ex:
         Event(
             ExceptionMessage(priority="debug",
                              publisher=NS.publisher_id,
                              payload={
                                  "message":
                                  'Failed to fetch stats for metric %s'
                                  ' of %s using url. %s' %
                                  (metric_name, entity_name, url),
                                  "exception":
                                  ex
                              }))
         raise TendrlPerformanceMonitoringException(str(ex))
示例#3
0
 def get_node_disk_iops_stats(self,
                              node_id,
                              time_interval=None,
                              start_time=None,
                              end_time=None):
     node_name = central_store_util.get_node_name_from_id(node_id)
     node_name = node_name.replace('.', '_')
     target = Template(
         'sumSeries(averageSeries($prefix.$node_name.disk-*.disk_ops.write'
         '), averageSeries($prefix.$node_name.disk-*.disk_ops.read))'
     ).substitute(
         prefix=self.prefix,
         node_name=node_name,
     )
     target = urllib.quote(target)
     if time_interval:
         if time_interval == 'latest':
             target = "cactiStyle(%s)" % target
         else:
             start_time = self.parse_time(time_interval)
     if start_time:
         start_time = self.parse_time(start_time)
     if end_time:
         end_time = self.parse_time(end_time)
     url = 'http://%s:%s/render?target=%s&format=json' % (
         self.host, str(self.port), target)
     if start_time:
         url = "%s&from=%s" % (url, start_time)
     if end_time:
         url = "%s&until=%s" % (url, end_time)
     try:
         stats = self.http.request('GET', url, timeout=5)
         if stats.status == 200:
             # TODO(Anmol): remove nulls from graphite data before returning
             # data. Explore the possibility of achieving this using some
             # tuning factor in graphite.
             data = re.sub('\[null, [0-9]+\], ', '', stats.data)
             data = re.sub(', \[null, [0-9]+\]', '', data)
             return data
         else:
             TendrlPerformanceMonitoringException(
                 'Request status code: %s' % str(stats.status))
     except (ValueError, Exception) as ex:
         Event(
             ExceptionMessage(priority="debug",
                              publisher=NS.publisher_id,
                              payload={
                                  "message":
                                  'Failed to fetch %s stats using url %s'
                                  '. Error %s' % (target, url),
                                  "exception":
                                  ex
                              }))
         raise TendrlPerformanceMonitoringException(str(ex))
def get_cluster_summary(cluster_id):
    try:
        summary = ClusterSummary(cluster_id=cluster_id)
        if not summary.exists():
            raise TendrlPerformanceMonitoringException(
                "No summary found for cluster %s" % cluster_id)
        summary = summary.load().to_json()
        for key, value in summary.items():
            if (key.startswith("_")
                    or key in ['hash', 'updated_at', 'value', 'list']):
                del summary[key]
        return summary
    except Exception as ex:
        raise TendrlPerformanceMonitoringException(str(ex))
def get_system_summary(cluster_type):
    try:
        summary = SystemSummary(sds_type=cluster_type)
        if not summary.exists():
            raise TendrlPerformanceMonitoringException(
                "No clusters of type %s found" % cluster_type)
        summary = summary.load().to_json()
        for key, value in summary.items():
            if (key.startswith("_")
                    or key in ['hash', 'updated_at', 'value', 'list']):
                del summary[key]
        return summary
    except Exception as ex:
        raise TendrlPerformanceMonitoringException(str(ex))
示例#6
0
 def get_most_used_bricks(self, bricks):
     brick_utilizations = []
     if not bricks:
         return brick_utilizations
     for brick_path, brick_det in bricks.iteritems():
         if (
             'utilization' in brick_det and
             'used_percent' in brick_det['utilization']
         ):
             brick_utilizations.append(brick_det['utilization'])
         else:
             Event(
                 ExceptionMessage(
                     priority="debug",
                     publisher=NS.publisher_id,
                     payload={
                         "message": "No utilization info for brick "
                         "%s" % brick_path,
                         "exception": TendrlPerformanceMonitoringException(
                             'No utilization info for brick %s' % brick_path
                         )
                     }
                 )
             )
     brick_utilizations = sorted(
         brick_utilizations,
         key=lambda k: k['used_percent']
     )
     brick_utilizations.reverse()
     return brick_utilizations[:5]
示例#7
0
 def get_metrics(self, entity_name):
     url = 'http://%s:%s/metrics/index.json' % (self.host, str(self.port))
     try:
         gevent.sleep(5)
         resp = self.http.request('GET', url, timeout=5)
         if resp.status != 200:
             raise TendrlPerformanceMonitoringException(
                 'Request status code: %s' % str(resp.status_code))
         data = resp.data
         metrics = ast.literal_eval(data)
         result = []
         prefix = "%s.%s." % (self.prefix, entity_name.replace('.', '_'))
         split_metrics = []
         for metric in metrics:
             if metric.startswith(prefix):
                 split_metrics = metric.split(prefix)
                 result.append(split_metrics[1])
         return str(result)
     except (ValueError, Exception) as ex:
         Event(
             ExceptionMessage(
                 priority="debug",
                 publisher=NS.publisher_id,
                 payload={
                     "message":
                     'Failed to get metrics for %s.' % entity_name,
                     "exception": ex
                 }))
def get_node_name_from_id(node_id):
    try:
        node_name_path = '/nodes/%s/NodeContext/fqdn' % node_id
        return NS._int.client.read(node_name_path).value
    except (EtcdKeyNotFound, EtcdConnectionFailed, ValueError, SyntaxError,
            EtcdException, TypeError) as ex:
        raise TendrlPerformanceMonitoringException(str(ex))
示例#9
0
def initiate_config_generation(node_det):
    try:
        job_params = {
            'node_ids': [node_det.get('node_id')],
            "run": 'node_monitoring.flows.ConfigureCollectd',
            'type': 'monitoring',
            "parameters": {
                'plugin_name':
                node_det['plugin'],
                'plugin_conf_params':
                json.dumps(node_det['plugin_conf']).encode('utf-8'),
                'Node.fqdn':
                node_det['fqdn'],
                'Service.name':
                'collectd',
            },
        }
        Job(
            job_id=str(uuid.uuid4()),
            status='new',
            payload=job_params,
        ).save()
    except (EtcdException, EtcdConnectionFailed, Exception) as ex:
        raise TendrlPerformanceMonitoringException(
            'Failed to intiate monitoring configuration for plugin \
            %s on %s with parameters %s.Error %s' %
            (node_det['plugin'], node_det['fqdn'],
             json.dumps(node_det['plugin_conf']), str(ex)))
示例#10
0
def get_latest_stats(node, resource):
    try:
        node_name = central_store_util.get_node_name_from_id(node)
        stats = NS.time_series_db_manager.get_plugin().get_metric_stats(
            node_name,
            resource,
            'latest'
        )
        if stats == "[]" or not stats:
            raise TendrlPerformanceMonitoringException(
                'Stats not yet available in time series db'
            )
        return re.findall('Current:(.+?)Max', stats)
    except TendrlPerformanceMonitoringException as ex:
        Event(
            ExceptionMessage(
                priority="debug",
                publisher=NS.publisher_id,
                payload={"message": 'Failed to get latest stats of %s of '
                                    'node %s for node summary.'
                                    % (resource, node),
                         "exception": ex
                         }
            )
        )
        raise ex
示例#11
0
def get_node_summary():
    try:
        # Only 1 filter that is the node list is the only supported filter
        # anything else is simply ignored.
        summary = []
        ret_code = 200
        exs = ''
        is_filter = (len(request.args) == 1
                     and request.args.items()[0][0] == 'node_ids')
        if is_filter:
            node_list = (request.args.items()[0][1]).split(",")
            for index, node in enumerate(node_list):
                uuid_string = node_list[index].strip()
                if UUID(uuid_string,
                        version=4).hex == uuid_string.replace('-', ''):
                    node_list[index] = node_list[index].strip()
                else:
                    raise TendrlPerformanceMonitoringException(
                        'Node id %s in the parameter is not a valid uuid' %
                        (uuid_string))
            summary, ret_code, exs = \
                central_store_util.get_node_summary(node_list)
        else:
            summary, ret_code, exs = \
                central_store_util.get_node_summary()
        return Response(json.dumps(summary),
                        status=ret_code,
                        mimetype='application/json')
    except (etcd.EtcdKeyNotFound, etcd.EtcdConnectionFailed, ValueError,
            SyntaxError, etcd.EtcdException,
            TendrlPerformanceMonitoringException, TypeError) as ex:
        return Response(str(ex), status=500, mimetype='application/json')
def get_node_role(node_id):
    try:
        return NS._int.client.read('/nodes/%s/NodeContext/tags' %
                                   node_id).value
    except Exception as ex:
        raise TendrlPerformanceMonitoringException(
            "Failed to fetch the role of node %s. Error %s" %
            (node_id, str(ex)))
def get_node_cluster_name(node_id):
    try:
        return NS._int.client.read('/nodes/%s/TendrlContext/cluster_name' %
                                   node_id).value
    except Exception as ex:
        raise TendrlPerformanceMonitoringException(
            "Failed to fetch cluster name for node %s. Error: %s" %
            (node_id, str(ex)))
示例#14
0
 def get_node_ids(self):
     try:
         node_ids = []
         nodes_etcd = tendrl_ns.etcd_orm.client.read('/nodes')
         for node in nodes_etcd._children:
             node_ids.append(node['key'][len('/nodes/'):])
         return node_ids
     except EtcdKeyNotFound:
         return []
     except (EtcdConnectionFailed, ValueError, SyntaxError,
             TypeError) as ex:
         raise TendrlPerformanceMonitoringException(str(ex))
示例#15
0
 def get_metric_stats(self, entity_name, metric_name, time_interval=None):
     metric_name = '%s.%s' % (entity_name.replace('.', '_'), metric_name)
     target = '%s.%s' % (self.prefix, metric_name)
     if time_interval == 'latest':
         target = "cactiStyle(%s)" % target
     url = 'http://%s:%s/render?target=%s&format=json' % (
         self.host, str(self.port), target)
     try:
         stats = self.http.request('GET', url, timeout=5)
         if stats.status == 200:
             return stats.data
         else:
             TendrlPerformanceMonitoringException(
                 'Request status code: %s' % str(
                     data.status_code
                 )
             )
     except (ValueError, Exception) as ex:
         LOG.error('Failed to fetch stats for metric %s of %s using url %s.Error %s ' % (
             metric_name, entity_name, url, str(ex)), exc_info=True)
         raise TendrlPerformanceMonitoringException(str(ex))
def get_cluster_ids():
    try:
        cluster_ids = []
        clusters_etcd = NS._int.client.read('/clusters')
        for cluster in clusters_etcd.leaves:
            cluster_key_contents = cluster.key.split('/')
            if len(cluster_key_contents) == 3:
                cluster_ids.append(cluster_key_contents[2])
        return cluster_ids
    except EtcdKeyNotFound:
        return []
    except (EtcdConnectionFailed, ValueError, SyntaxError, TypeError) as ex:
        raise TendrlPerformanceMonitoringException(str(ex))
def get_node_alert_ids(node_id=None):
    alert_ids = []
    try:
        alerts = NS._int.client.read('/alerting/nodes/%s' % node_id)
        for alert in alerts.leaves:
            key_contents = alert.key.split('/')
            if len(key_contents) == 5:
                alert_ids.append(key_contents[4])
    except EtcdKeyNotFound as ex:
        return alert_ids
    except (EtcdConnectionFailed, EtcdException) as ex:
        raise TendrlPerformanceMonitoringException(str(ex))
    return alert_ids
def get_node_ids():
    try:
        node_ids = []
        nodes_etcd = NS._int.client.read('/nodes')
        for node in nodes_etcd.leaves:
            node_key_contents = node.key.split('/')
            if len(node_key_contents) == 3:
                node_ids.append(node_key_contents[2])
        return node_ids
    except EtcdKeyNotFound:
        return []
    except (EtcdConnectionFailed, ValueError, SyntaxError, TypeError) as ex:
        raise TendrlPerformanceMonitoringException(str(ex))
示例#19
0
def get_sdsthroughput(sds_type, network_type):
    try:
        start_time = None
        end_time = None
        time_interval = None
        if len(request.args.items()) > 0:
            for request_param in request.args.items():
                if request_param[0] == "start_time":
                    start_time = request_param[1]
                elif request_param[0] == "end_time":
                    end_time = request_param[1]
                elif request_param[0] == "interval":
                    time_interval = request_param[1]
        # validate sds-type
        if sds_type not in NS.sds_monitoring_manager.supported_sds:
            raise TendrlPerformanceMonitoringException(
                'Unsupported sds %s' % sds_type
            )
        entity_name, metric_name = NS.time_series_db_manager.\
            get_timeseriesnamefromresource(
                sds_type=sds_type,
                network_type=network_type,
                resource_name=pm_consts.SYSTEM_THROUGHPUT,
                utilization_type=pm_consts.USED
            ).split(
                NS.time_series_db_manager.get_plugin().get_delimeter(),
                1
            )
        return Response(
            NS.time_series_db_manager.\
            get_plugin().\
            get_metric_stats(
                entity_name,
                metric_name,
                time_interval=time_interval,
                start_time=start_time,
                end_time=end_time
            ),
            status=200,
            mimetype='application/json'
        )
    except (
        ValueError,
        etcd.EtcdKeyNotFound,
        etcd.EtcdConnectionFailed,
        SyntaxError,
        etcd.EtcdException,
        TypeError,
        TendrlPerformanceMonitoringException
    ) as ex:
        return Response(str(ex), status=500, mimetype='application/json')
示例#20
0
def get_sdsutilization(sds_type, utiliation_type):
    try:
        start_time = None
        end_time = None
        time_interval = None
        if len(request.args.items()) > 0:
            for request_param in request.args.items():
                if request_param[0] == "start_time":
                    start_time = request_param[1]
                elif request_param[0] == "end_time":
                    end_time = request_param[1]
                elif request_param[0] == "interval":
                    time_interval = request_param[1]
        # validate sds-type
        if sds_type not in NS.sds_monitoring_manager.supported_sds:
            raise TendrlPerformanceMonitoringException(
                'Unsupported sds %s' % sds_type
            )
        entity_name, metric_name = NS.time_series_db_manager.\
            get_timeseriesnamefromresource(
                resource_name=pm_consts.SYSTEM_UTILIZATION,
                utilization_type=utiliation_type,
                sds_type=sds_type
        ).split(
            NS.time_series_db_manager.get_plugin().get_delimeter(),
            1
        )
        return Response(
            NS.time_series_db_manager.\
            get_plugin().\
            get_metric_stats(
                entity_name,
                metric_name,
                time_interval=time_interval,
                start_time=start_time,
                end_time=end_time
            ),
            status=200,
            mimetype='application/json'
        )
    except (
        AttributeError,
        ValueError,
        etcd.EtcdException,
        SyntaxError,
        urllib3.exceptions.HTTPError,
        TypeError,
        TendrlPerformanceMonitoringException
    ) as ex:
        return Response(str(ex), status=500, mimetype='application/json')
示例#21
0
def get_system_summary(cluster_type):
    try:
        if cluster_type not in NS.sds_monitoring_manager.supported_sds:
            raise TendrlPerformanceMonitoringException('Unsupported sds %s' %
                                                       cluster_type)
        summary = central_store_util.get_system_summary(cluster_type)
        return Response(json.dumps(summary),
                        status=200,
                        mimetype='application/json')
    except TendrlPerformanceMonitoringException as ex:
        return Response('Failed to fetch %s system summary.Error %s' %
                        (cluster_type, str(ex)),
                        status=500,
                        mimetype='application/json')
示例#22
0
def get_clusters_iops():
    try:
        cluster_list = None
        start_time = None
        end_time = None
        time_interval = None
        if len(request.args.items()) > 0:
            for request_param in request.args.items():
                if request_param[0] == "start_time":
                    start_time = request_param[1]
                elif request_param[0] == "end_time":
                    end_time = request_param[1]
                elif request_param[0] == "interval":
                    time_interval = request_param[1]
                elif request_param[0] == "cluster_ids":
                    cluster_list = (request.args.items()[0][1]).split(",")
        iops = []
        ret_code = 200
        exs = ''
        if cluster_list:
            for index, node in enumerate(cluster_list):
                uuid_string = cluster_list[index].strip()
                if UUID(uuid_string,
                        version=4).hex == uuid_string.replace('-', ''):
                    cluster_list[index] = cluster_list[index].strip()
                else:
                    raise TendrlPerformanceMonitoringException(
                        'Cluster id %s in the parameter is not a valid '
                        'uuid' % (uuid_string))
            iops, ret_code, exs = \
                central_store_util.get_cluster_iops(
                    cluster_list,
                    time_interval=time_interval,
                    start_time=start_time,
                    end_time=end_time
                )
        else:
            iops, ret_code, exs = \
                central_store_util.get_cluster_iops(
                    time_interval=time_interval,
                    start_time=start_time,
                    end_time=end_time
                )
        return Response(json.dumps(iops),
                        status=ret_code,
                        mimetype='application/json')
    except (etcd.EtcdKeyNotFound, etcd.EtcdConnectionFailed, ValueError,
            SyntaxError, etcd.EtcdException,
            TendrlPerformanceMonitoringException, TypeError) as ex:
        return Response(str(ex), status=500, mimetype='application/json')
示例#23
0
 def get_configs(self):
     # TODO(Anmol) : Attempt reading:
     # /_tendrl/config/performance_monitoring/clusters/{cluster-id} and if
     # not already present, default back to defaults in:
     #  /_tendrl/config/performance_monitoring
     try:
         configs = ''
         conf = tendrl_ns.etcd_orm.client.read(
             '/_tendrl/config/performance_monitoring')
         configs = conf.value
         return yaml.safe_load(configs)
     except (EtcdKeyNotFound, EtcdConnectionFailed, ValueError, SyntaxError,
             EtcdException) as ex:
         LOG.error('Fetching monitoring configurations failed. Error %s' %
                   ex)
         raise TendrlPerformanceMonitoringException(str(ex))
def get_nodes_details():
    nodes_dets = []
    try:
        nodes = NS._int.client.read('/nodes/')
        for node in nodes.leaves:
            if node.key.startswith('/nodes/'):
                node_id = (node.key.split('/')[2]).encode('ascii', 'ignore')
                fqdn = (NS._int.client.read('/nodes/%s/NodeContext/fqdn' %
                                            (node_id)).value).encode(
                                                'ascii', 'ignore')
                nodes_dets.append({'node_id': node_id, 'fqdn': fqdn})
        return nodes_dets
    except EtcdKeyNotFound:
        return nodes_dets
    except EtcdConnectionFailed as ex:
        raise TendrlPerformanceMonitoringException(str(ex))
示例#25
0
 def get_nodes_details(self):
     nodes_dets = []
     try:
         nodes = tendrl_ns.etcd_orm.client.read('/nodes/', recursive=True)
         for node in nodes._children:
             if node['key'].startswith('/nodes/'):
                 node_id = (node['key'][len('/nodes/'):]).encode(
                     'ascii', 'ignore')
                 fqdn = (tendrl_ns.etcd_orm.client.read(
                     '%s/NodeContext/fqdn' % (node['key']),
                     recursive=True).value).encode('ascii', 'ignore')
                 nodes_dets.append({'node_id': node_id, 'fqdn': fqdn})
         return nodes_dets
     except EtcdKeyNotFound:
         return nodes_dets
     except EtcdConnectionFailed as ex:
         raise TendrlPerformanceMonitoringException(str(ex))
示例#26
0
def initiate_config_generation(node_det):
    try:
        plugin = NodeMonitoringPlugin(
            plugin_name=node_det['plugin'],
            node_id=node_det.get('node_id')
        )
        if plugin.exists():
            # More powers like fixed retrials can be added here.This is common
            # point through which all monitoring plugin configuration jobs land
            # into etcd and hence any action here is reflected to all of them.
            return
        job_params = {
            'node_ids': [node_det.get('node_id')],
            "run": 'node_monitoring.flows.ConfigureCollectd',
            'type': 'monitoring',
            "parameters": {
                'plugin_name': node_det['plugin'],
                'plugin_conf_params': json.dumps(
                    node_det['plugin_conf']
                ).encode('utf-8'),
                'Node.fqdn': node_det['fqdn'],
                'Service.name': 'collectd',
            },
        }
        job_id = str(uuid.uuid4())
        Job(
            job_id=job_id,
            status='new',
            payload=job_params,
        ).save()
        NodeMonitoringPlugin(
            plugin_name=node_det['plugin'],
            node_id=node_det.get('node_id'),
            job_id=job_id
        ).save(update=False)
    except (EtcdException, EtcdConnectionFailed, Exception) as ex:
        raise TendrlPerformanceMonitoringException(
            'Failed to intiate monitoring configuration for plugin \
            %s on %s with parameters %s.Error %s' % (
                node_det['plugin'],
                node_det['fqdn'],
                json.dumps(node_det['plugin_conf']),
                str(ex)
            )
        )
def get_configs():
    # TODO(Anmol) : Attempt reading:
    # /_tendrl/config/performance_monitoring/clusters/{cluster-id} and if
    # not already present, default back to defaults in:
    #  /_tendrl/config/performance_monitoring
    try:
        configs = ''
        conf = NS._int.client.read('_NS/performance_monitoring/config')
        configs = conf.value
        return yaml.safe_load(configs)
    except (EtcdKeyNotFound, EtcdConnectionFailed, ValueError, SyntaxError,
            EtcdException) as ex:
        Event(
            ExceptionMessage(priority="debug",
                             publisher=NS.publisher_id,
                             payload={
                                 "message":
                                 'Fetching monitoring configurations failed.',
                                 "exception": ex
                             }))
        raise TendrlPerformanceMonitoringException(str(ex))
示例#28
0
 def get_timeseriesnamefromresource(self, **kwargs):
     # If in future this function starts to appear more plugin
     # specific move it from here to respecive TimeSeriesDBPlugin
     delimeter = self.get_plugin().get_delimeter()
     resource_name = kwargs['resource_name']
     if 'utilization_type' in kwargs:
         kwargs['utilization_type'] = self.get_plugin().get_utilizationtype(
             resource_name,
             kwargs['utilization_type']
         )
     pattern = {
         pm_consts.SYSTEM_UTILIZATION: '$sds_type{0}utilization{0}'
         '$utilization_type',
         pm_consts.CLUSTER_UTILIZATION: 'cluster_$cluster_id{0}'
         'cluster_utilization{0}$utilization_type',
         pm_consts.CLUSTER_THROUGHPUT: 'cluster_$cluster_id{0}'
         'throughput{0}$network_type{0}$utilization_type',
         pm_consts.SYSTEM_THROUGHPUT: '$sds_type{0}'
         'throughput{0}$network_type{0}$utilization_type',
         pm_consts.NODE_THROUGHPUT: '$node_name{0}'
         'network_throughput-$network_type{0}$utilization_type',
         pm_consts.LATENCY: 'ping{0}ping-$underscored_monitoring_node_name',
         pm_consts.IOPS: 'cluster_$cluster_id{0}cluster_iops_read_write{0}'
         '$utilization_type',
         pm_consts.SWAP: 'swap{0}$utilization_type',
         pm_consts.SWAP_TOTAL: '$utilization_type',
         pm_consts.CPU: '$underscored_node_name{0}cpu{0}cpu_system_user{0}'
         '$utilization_type',
         pm_consts.STORAGE: '$underscored_node_name{0}storage{0}'
         '$utilization_type',
         pm_consts.CLUSTER_IOPS: 'cluster_$cluster_id{0}'
         'cluster_iops_read_write{0}gauge-total'
     }
     if not pattern.get(resource_name):
         raise TendrlPerformanceMonitoringException(
             'No pattern found for the requested resource %s.'
         )
     return Template(
         pattern.get(resource_name).format(delimeter)
     ).substitute(kwargs)
示例#29
0
 def get_metrics(self, entity_name):
     url = 'http://%s:%s/metrics/index.json' % (self.host, str(self.port))
     try:
         time.sleep(5)
         resp = self.http.request('GET', url, timeout=5)
         if resp.status != 200:
             raise TendrlPerformanceMonitoringException(
                 'Request status code: %s' % str(resp.status_code)
             )
         data = resp.data
         metrics = ast.literal_eval(data)
         result = []
         prefix = "%s.%s." % (self.prefix, entity_name.replace('.', '_'))
         split_metrics = []
         for metric in metrics:
             if metric.startswith(prefix):
                 split_metrics = metric.split(prefix)
                 result.append(split_metrics[1])
         return str(result)
     except (ValueError, Exception) as ex:
         LOG.error('Failed to get metrics for %s.Error %s ' %
                   (entity_name, ex), exc_info=True)
 def initiate_config_generation(self, conf_name, data, node_det):
     try:
         job = {
             'node_ids': [node_det.get('node_id')],
             "run":
             'tendrl.node_monitoring.flows.configure_collectd.ConfigureCollectd',
             'status': 'new',
             'type': 'monitoring',
             'integration_id': tendrl_ns.tendrl_context.integration_id,
             "parameters": {
                 'plugin_name': conf_name,
                 'plugin_conf_params': json.dumps(data),
                 'Node.fqdn': node_det['fqdn'],
                 'Service.name': 'collectd',
             },
         }
         tendrl_ns.etcd_orm.client.write("/queue/%s" % str(uuid.uuid4()),
                                         json.dumps(job))
     except (EtcdException, EtcdConnectionFailed, EtcdException) as ex:
         LOG.error('Failed to intiate monitoring configuration for plugin \
             %s on %s with parameters %s.Error %s' %
                   (conf_name, node_det['fqdn'], data, ex))
         raise TendrlPerformanceMonitoringException(str(ex))