示例#1
0
 def get_brick_status_wise_counts(self, cluster_id, bricks):
     brick_status_wise_counts = {
         'stopped': 0,
         'total': 0,
         pm_consts.WARNING_ALERTS: 0,
         pm_consts.CRITICAL_ALERTS: 0
     }
     for brick_path, brick_det in bricks.iteritems():
         if (
             'status' in brick_det and
             brick_det['status'] == 'Stopped'
         ):
             brick_status_wise_counts['stopped'] = \
                 brick_status_wise_counts['stopped'] + 1
         brick_status_wise_counts['total'] = \
             brick_status_wise_counts['total'] + 1
     crit_alerts, warn_alerts = parse_resource_alerts(
         'brick',
         pm_consts.CLUSTER,
         cluster_id=cluster_id
     )
     brick_status_wise_counts[
         pm_consts.CRITICAL_ALERTS
     ] = len(crit_alerts)
     brick_status_wise_counts[
         pm_consts.WARNING_ALERTS
     ] = len(warn_alerts)
     return brick_status_wise_counts
示例#2
0
 def get_osd_status_wise_counts(self, cluster_id, osds):
     osd_status_wise_counts = {
         'total': 0,
         'down': 0,
         pm_consts.CRITICAL_ALERTS: 0,
         pm_consts.WARNING_ALERTS: 0,
         'near_full': 0
     }
     for osd in osds:
         if 'up' not in osd.get('state', ''):
             osd_status_wise_counts['down'] = \
                 osd_status_wise_counts['down'] + 1
         osd_status_wise_counts['total'] = \
             osd_status_wise_counts['total'] + 1
     crit_alerts, warn_alerts = parse_resource_alerts(
         'osd',
         pm_consts.CLUSTER,
         cluster_id=cluster_id
     )
     for osd_alert in crit_alerts:
         if (
             osd_alert['severity'] == pm_consts.CRITICAL and
             osd_alert['resource'] == 'osd_utilization'
         ):
             osd_status_wise_counts['near_full'] = \
                 osd_status_wise_counts.get('near_full', 0) + 1
     osd_status_wise_counts[
         pm_consts.CRITICAL_ALERTS
     ] = len(crit_alerts)
     osd_status_wise_counts[
         pm_consts.WARNING_ALERTS
     ] = len(warn_alerts)
     return osd_status_wise_counts
示例#3
0
 def get_brick_status_wise_counts(self, cluster_id, volumes_det):
     brick_status_wise_counts = {
         'stopped': 0,
         'total': 0,
         pm_consts.WARNING_ALERTS: 0,
         pm_consts.CRITICAL_ALERTS: 0
     }
     try:
         for volume_id, volume_det in volumes_det.iteritems():
             for brick_path, brick_det in volume_det.get('Bricks',
                                                         {}).iteritems():
                 if brick_det['status'] == 'Stopped':
                     brick_status_wise_counts['stopped'] = \
                         brick_status_wise_counts['stopped'] + 1
                 brick_status_wise_counts['total'] = \
                     brick_status_wise_counts['total'] + 1
     except Exception as ex:
         Event(
             ExceptionMessage(priority="debug",
                              publisher=NS.publisher_id,
                              payload={
                                  "message":
                                  "Exception caught computing brick "
                                  "status wise counts",
                                  "exception":
                                  ex
                              }))
     crit_alerts, warn_alerts = parse_resource_alerts('brick',
                                                      pm_consts.CLUSTER,
                                                      cluster_id=cluster_id)
     brick_status_wise_counts[pm_consts.CRITICAL_ALERTS] = len(crit_alerts)
     brick_status_wise_counts[pm_consts.WARNING_ALERTS] = len(warn_alerts)
     return brick_status_wise_counts
示例#4
0
 def get_volume_status_wise_counts(self, cluster_id, volumes):
     volume_status_wise_counts = {
         'down': 0,
         'total': 0,
         'degraded': 0,
         pm_consts.CRITICAL_ALERTS: 0,
         pm_consts.WARNING_ALERTS: 0
     }
     # Needs to be tested
     for vol_id, vol_det in volumes.iteritems():
         if 'Started' not in vol_det.get('status'):
             volume_status_wise_counts['down'] = \
                 volume_status_wise_counts['down'] + 1
         volume_status_wise_counts['total'] = \
             volume_status_wise_counts['total'] + 1
     volumes_up_degraded = 0
     try:
         volumes_up_degraded = NS._int.client.read(
             '/clusters/%s/GlobalDetails/volume_up_degraded' %
             cluster_id).value
     except EtcdKeyNotFound:
         pass
     volume_status_wise_counts['degraded'] = \
         int(volumes_up_degraded or 0)
     crit_alerts, warn_alerts = parse_resource_alerts('volume',
                                                      pm_consts.CLUSTER,
                                                      cluster_id=cluster_id)
     volume_status_wise_counts[pm_consts.CRITICAL_ALERTS] = len(crit_alerts)
     volume_status_wise_counts[pm_consts.WARNING_ALERTS] = len(warn_alerts)
     return volume_status_wise_counts
示例#5
0
 def get_clusters_status_wise_counts(self, cluster_summaries):
     clusters_status_wise_counts = {
         'status': {
             'total': 0
         },
         'near_full': 0,
         pm_consts.CRITICAL_ALERTS: 0,
         pm_consts.WARNING_ALERTS: 0
     }
     cluster_alerts = []
     for cluster_summary in cluster_summaries:
         cluster_tendrl_context = {}
         cluster_status = {}
         sds_name = central_store_util.get_cluster_sds_name(
             cluster_summary.cluster_id)
         try:
             cluster_tendrl_context = central_store_util.read(
                 '/clusters/%s/TendrlContext' % cluster_summary.cluster_id)
             cluster_status = central_store_util.read(
                 '/clusters/%s/GlobalDetails' % cluster_summary.cluster_id)
             cluster_status = cluster_status.get('status')
         except EtcdKeyNotFound:
             return clusters_status_wise_counts
         if (self.name in cluster_tendrl_context.get('sds_name')):
             if cluster_status:
                 if (cluster_status
                         not in clusters_status_wise_counts['status']):
                     clusters_status_wise_counts['status'][
                         cluster_status] = 1
                 else:
                     clusters_status_wise_counts['status'][
                         cluster_status
                     ] = \
                         clusters_status_wise_counts['status'][
                             cluster_status
                     ] + 1
                 clusters_status_wise_counts['status']['total'] = \
                     clusters_status_wise_counts['status']['total'] + 1
             cluster_critical_alerts, cluster_warning_alerts = \
                 parse_resource_alerts(
                     None,
                     pm_consts.CLUSTER,
                     cluster_id=cluster_summary.cluster_id
                 )
             cluster_alerts.extend(cluster_critical_alerts)
             cluster_alerts.extend(cluster_warning_alerts)
             clusters_status_wise_counts[
                 pm_consts.CRITICAL_ALERTS] = clusters_status_wise_counts[
                     pm_consts.CRITICAL_ALERTS] + len(
                         cluster_critical_alerts)
             clusters_status_wise_counts[
                 pm_consts.WARNING_ALERTS] = clusters_status_wise_counts[
                     pm_consts.WARNING_ALERTS] + len(cluster_warning_alerts)
     for cluster_alert in cluster_alerts:
         if (cluster_alert['severity'] == pm_consts.CRITICAL
                 and cluster_alert['resource'] == 'cluster_utilization'):
             clusters_status_wise_counts['near_full'] = \
                 clusters_status_wise_counts.get('near_full', 0) + 1
     return clusters_status_wise_counts
 def get_node_osd_status_wise_counts(self, node_id):
     osds_in_node = []
     osd_status_wise_counts = {
         'total': 0,
         'down': 0,
         pm_consts.CRITICAL_ALERTS: 0,
         pm_consts.WARNING_ALERTS: 0
     }
     cluster_id = central_store_util.get_node_cluster_id(node_id)
     node_ip = ''
     ip_indexes = etcd_read_key('/indexes/ip')
     for ip, indexed_node_id in ip_indexes.iteritems():
         if node_id == indexed_node_id:
             node_ip = ip
     try:
         osds = etcd_read_key('/clusters/%s/maps/osd_map/data/osds' %
                              cluster_id)
         osds = ast.literal_eval(osds.get('osds', '[]'))
         for osd in osds:
             if (node_ip in osd.get('cluster_addr', '')
                     or node_ip in osd.get('public_addr', '')):
                 osds_in_node.append(osd.get('osd'))
                 if 'up' not in osd.get('state'):
                     osd_status_wise_counts['down'] = \
                         osd_status_wise_counts['down'] + 1
                 osd_status_wise_counts['total'] = \
                     osd_status_wise_counts['total'] + 1
         crit_alerts, warn_alerts = parse_resource_alerts(
             'osd', pm_consts.CLUSTER, cluster_id=cluster_id)
         count = 0
         for alert in crit_alerts:
             plugin_instance = alert['tags'].get('plugin_instance', '')
             if int(plugin_instance[len('osd_'):]) in osds_in_node:
                 count = count + 1
         osd_status_wise_counts[pm_consts.CRITICAL_ALERTS] = count
         count = 0
         for alert in warn_alerts:
             plugin_instance = alert['tags'].get('plugin_instance', '')
             if int(plugin_instance[len('osd_'):]) in osds_in_node:
                 count = count + 1
         osd_status_wise_counts[pm_consts.WARNING_ALERTS] = count
     except Exception as ex:
         Event(
             ExceptionMessage(priority="debug",
                              publisher=NS.publisher_id,
                              payload={
                                  "message":
                                  "Exception caught computing node osd "
                                  "counts",
                                  "exception":
                                  ex
                              }))
     return osd_status_wise_counts
 def get_rbd_status_wise_counts(self, cluster_id, rbds):
     # No status for rbds so currently only alert counters will be available
     rbd_status_wise_counts = {
         pm_consts.CRITICAL_ALERTS: 0,
         pm_consts.WARNING_ALERTS: 0,
         pm_consts.TOTAL: 0
     }
     rbd_status_wise_counts[pm_consts.TOTAL] = len(rbds)
     crit_alerts, warn_alerts = parse_resource_alerts('rbd',
                                                      pm_consts.CLUSTER,
                                                      cluster_id=cluster_id)
     rbd_status_wise_counts[pm_consts.CRITICAL_ALERTS] = len(crit_alerts)
     rbd_status_wise_counts[pm_consts.WARNING_ALERTS] = len(warn_alerts)
     return rbd_status_wise_counts
示例#8
0
 def get_node_brick_status_counts(self, node_id):
     node_name = central_store_util.get_node_name_from_id(node_id)
     ip_indexes = etcd_read_key('/indexes/ip')
     node_ip = ''
     for ip, indexed_node_id in ip_indexes.iteritems():
         if node_id == indexed_node_id:
             node_ip = ip
     brick_status_wise_counts = {
         'stopped': 0,
         'total': 0,
         pm_consts.WARNING_ALERTS: 0,
         pm_consts.CRITICAL_ALERTS: 0
     }
     try:
         cluster_id = central_store_util.get_node_cluster_id(node_id)
         if cluster_id:
             volumes_det = self.get_cluster_volumes(cluster_id)
             for volume_id, volume_det in volumes_det.iteritems():
                 for brick_path, brick_det in volume_det.get(
                         'Bricks', {}).iteritems():
                     if (brick_det['hostname'] == node_name
                             or brick_det['hostname'] == node_ip):
                         if brick_det['status'] == 'Stopped':
                             brick_status_wise_counts['stopped'] = \
                                 brick_status_wise_counts['stopped'] + 1
                         brick_status_wise_counts['total'] = \
                             brick_status_wise_counts['total'] + 1
         crit_alerts, warn_alerts = parse_resource_alerts(
             'brick', pm_consts.CLUSTER, cluster_id=cluster_id)
         count = 0
         for alert in crit_alerts:
             if alert['node_id'] == node_id:
                 count = count + 1
         brick_status_wise_counts[pm_consts.CRITICAL_ALERTS] = count
         count = 0
         for alert in warn_alerts:
             if alert['node_id'] == node_id:
                 count = count + 1
         brick_status_wise_counts[pm_consts.WARNING_ALERTS] = count
     except Exception as ex:
         Event(
             Message(priority="info",
                     publisher=NS.publisher_id,
                     payload={
                         "message": "Exception caught fetching node brick"
                         " status wise counts",
                         "exception": ex
                     }))
     return brick_status_wise_counts
 def get_pool_status_wise_counts(self, cluster_id, pools):
     # No status for pools, so only alert counters will be available
     pool_status_wise_counts = {
         pm_consts.CRITICAL_ALERTS: 0,
         pm_consts.WARNING_ALERTS: 0,
         pm_consts.TOTAL: 0
     }
     pool_status_wise_counts[pm_consts.TOTAL] = \
         len(pools.keys())
     crit_alerts, warn_alerts = parse_resource_alerts('pool',
                                                      pm_consts.CLUSTER,
                                                      cluster_id=cluster_id)
     pool_status_wise_counts[pm_consts.CRITICAL_ALERTS] = len(crit_alerts)
     pool_status_wise_counts[pm_consts.WARNING_ALERTS] = len(warn_alerts)
     return pool_status_wise_counts
示例#10
0
 def get_node_brick_status_counts(self, node_id):
     brick_status_wise_counts = {
         'stopped': 0,
         'total': 0,
         pm_consts.WARNING_ALERTS: 0,
         pm_consts.CRITICAL_ALERTS: 0
     }
     try:
         node_name = central_store_util.get_node_name_from_id(node_id)
     except EtcdKeyNotFound as ex:
         Event(
             ExceptionMessage(
                 priority="error",
                 publisher=NS.publisher_id,
                 payload={
                     "message": "Error fetching node name for node "
                     "%s" % node_id,
                     "exception": ex
                 }
             )
         )
         return brick_status_wise_counts
     try:
         ip_indexes = etcd_read_key('/indexes/ip')
     except EtcdKeyNotFound as ex:
         Event(
             ExceptionMessage(
                 priority="error",
                 publisher=NS.publisher_id,
                 payload={
                     "message": "Error fetching ip indexes",
                     "exception": ex
                 }
             )
         )
         return brick_status_wise_counts
     node_ip = ''
     for ip, indexed_node_id in ip_indexes.iteritems():
         if node_id == indexed_node_id:
             node_ip = ip
             break
     try:
         cluster_id = central_store_util.get_node_cluster_id(
             node_id
         )
         if cluster_id:
             bricks = self.get_cluster_bricks(cluster_id)
             for brick_path, brick_det in bricks.iteritems():
                 if (
                     brick_det['hostname'] == node_name or
                     brick_det['hostname'] == node_ip
                 ):
                     if (
                         'status' in brick_det and
                         brick_det['status'] == 'Stopped'
                     ):
                         brick_status_wise_counts['stopped'] = \
                             brick_status_wise_counts['stopped'] + 1
                     brick_status_wise_counts['total'] = \
                         brick_status_wise_counts['total'] + 1
         crit_alerts, warn_alerts = parse_resource_alerts(
             'brick',
             pm_consts.CLUSTER,
             cluster_id=cluster_id
         )
         count = 0
         for alert in crit_alerts:
             if alert['node_id'] == node_id:
                 count = count + 1
         brick_status_wise_counts[
             pm_consts.CRITICAL_ALERTS
         ] = count
         count = 0
         for alert in warn_alerts:
             if alert['node_id'] == node_id:
                 count = count + 1
         brick_status_wise_counts[
             pm_consts.WARNING_ALERTS
         ] = count
     except (
         TendrlPerformanceMonitoringException,
         AttributeError,
         ValueError,
         KeyError
     ) as ex:
         Event(
             Message(
                 priority="info",
                 publisher=NS.publisher_id,
                 payload={
                     "message": "Exception caught fetching node brick"
                     " status wise counts",
                     "exception": ex
                 }
             )
         )
     return brick_status_wise_counts