Exemplo n.º 1
0
 def get_clusters_status_wise_counts(self, cluster_summaries):
     clusters_status_wise_counts = {
         'status': {
             'total': 0
         },
         'near_full': 0,
         pm_consts.CRITICAL_ALERTS: 0,
         pm_consts.WARNING_ALERTS: 0
     }
     cluster_alerts = []
     for cluster_summary in cluster_summaries:
         cluster_tendrl_context = {}
         cluster_status = {}
         sds_name = central_store_util.get_cluster_sds_name(
             cluster_summary.cluster_id)
         try:
             cluster_tendrl_context = central_store_util.read(
                 '/clusters/%s/TendrlContext' % cluster_summary.cluster_id)
             cluster_status = central_store_util.read(
                 '/clusters/%s/GlobalDetails' % cluster_summary.cluster_id)
             cluster_status = cluster_status.get('status')
         except EtcdKeyNotFound:
             return clusters_status_wise_counts
         if (self.name in cluster_tendrl_context.get('sds_name')):
             if cluster_status:
                 if (cluster_status
                         not in clusters_status_wise_counts['status']):
                     clusters_status_wise_counts['status'][
                         cluster_status] = 1
                 else:
                     clusters_status_wise_counts['status'][
                         cluster_status
                     ] = \
                         clusters_status_wise_counts['status'][
                             cluster_status
                     ] + 1
                 clusters_status_wise_counts['status']['total'] = \
                     clusters_status_wise_counts['status']['total'] + 1
             cluster_critical_alerts, cluster_warning_alerts = \
                 parse_resource_alerts(
                     None,
                     pm_consts.CLUSTER,
                     cluster_id=cluster_summary.cluster_id
                 )
             cluster_alerts.extend(cluster_critical_alerts)
             cluster_alerts.extend(cluster_warning_alerts)
             clusters_status_wise_counts[
                 pm_consts.CRITICAL_ALERTS] = clusters_status_wise_counts[
                     pm_consts.CRITICAL_ALERTS] + len(
                         cluster_critical_alerts)
             clusters_status_wise_counts[
                 pm_consts.WARNING_ALERTS] = clusters_status_wise_counts[
                     pm_consts.WARNING_ALERTS] + len(cluster_warning_alerts)
     for cluster_alert in cluster_alerts:
         if (cluster_alert['severity'] == pm_consts.CRITICAL
                 and cluster_alert['resource'] == 'cluster_utilization'):
             clusters_status_wise_counts['near_full'] = \
                 clusters_status_wise_counts.get('near_full', 0) + 1
     return clusters_status_wise_counts
Exemplo n.º 2
0
 def configure_monitoring(self, integration_id):
     try:
         sds_tendrl_context = central_store_util.read(
             'clusters/%s/TendrlContext' % integration_id)
     except EtcdKeyNotFound:
         return None
     except EtcdException as ex:
         Event(
             ExceptionMessage(priority="debug",
                              publisher=NS.publisher_id,
                              payload={
                                  "message":
                                  'Failed to configure monitoring for '
                                  'cluster %s as tendrl context could '
                                  'not be fetched.' % integration_id,
                                  "exception":
                                  ex
                              }))
         return
     for plugin in SDSPlugin.plugins:
         if plugin.name == sds_tendrl_context['sds_name']:
             return plugin.configure_monitoring(sds_tendrl_context)
     Event(
         Message(priority="debug",
                 publisher=NS.publisher_id,
                 payload={
                     "message":
                     'No plugin defined for %s. Hence cannot '
                     'configure it' % sds_tendrl_context['sds_name']
                 }))
     return None
 def parse_cluster(self, cluster_id):
     utilization = central_store_util.read(
         '/clusters/%s/Utilization' % cluster_id
     )
     used = 0
     total = 0
     percent_used = 0
     if utilization.get('used_capacity'):
         used = utilization.get('used_capacity')
     elif utilization.get('used'):
         used = utilization.get('used')
     if utilization.get('raw_capacity'):
         total = utilization.get('raw_capacity')
     elif utilization.get('total'):
         total = utilization.get('total')
     if utilization.get('pcnt_used'):
         percent_used = utilization.get('pcnt_used')
     return ClusterSummary(
         utilization={
             'total': int(total),
             'used': int(used),
             'percent_used': float(percent_used)
         },
         iops=str(self.get_cluster_iops(cluster_id)),
         hosts_count=self.parse_host_count(cluster_id),
         sds_type=central_store_util.get_cluster_sds_name(cluster_id),
         node_summaries=self.cluster_nodes_summary(
             cluster_id
         ),
         sds_det=NS.sds_monitoring_manager.get_cluster_summary(
             cluster_id,
             central_store_util.get_cluster_name(cluster_id)
         ),
         cluster_id=cluster_id,
     )
 def parse_cluster(self, cluster_id):
     utilization = {}
     try:
         utilization = central_store_util.read('/clusters/%s/Utilization' %
                                               cluster_id)
     except (EtcdKeyNotFound, AttributeError, EtcdException) as ex:
         Event(
             ExceptionMessage(priority="debug",
                              publisher=NS.publisher_id,
                              payload={
                                  "message":
                                  'Utilization not available for cluster'
                                  ' %s.' % cluster_id,
                                  "exception":
                                  ex
                              }))
     used = 0
     total = 0
     percent_used = 0
     if utilization.get('used_capacity'):
         used = utilization.get('used_capacity')
     elif utilization.get('used'):
         used = utilization.get('used')
     if utilization.get('raw_capacity'):
         total = utilization.get('raw_capacity')
     elif utilization.get('total'):
         total = utilization.get('total')
     if utilization.get('pcnt_used'):
         percent_used = utilization.get('pcnt_used')
     try:
         sds_name = central_store_util.get_cluster_sds_name(cluster_id)
     except (EtcdKeyNotFound, EtcdException, AttributeError) as ex:
         Event(
             ExceptionMessage(priority="debug",
                              publisher=NS.publisher_id,
                              payload={
                                  "message":
                                  'Error caught fetching sds name of'
                                  ' cluster %s.' % cluster_id,
                                  "exception":
                                  ex
                              }))
     return ClusterSummary(
         utilization={
             'total': int(total),
             'used': int(used),
             'percent_used': float(percent_used)
         },
         iops=str(self.get_cluster_iops(cluster_id)),
         hosts_count=self.parse_host_count(cluster_id),
         sds_type=sds_name,
         node_summaries=self.cluster_nodes_summary(cluster_id),
         sds_det=NS.sds_monitoring_manager.get_cluster_summary(
             cluster_id, central_store_util.get_cluster_name(cluster_id)),
         cluster_id=cluster_id,
     )
 def parse_host_count(self, cluster_id):
     status_wise_count = {
         'total': 0,
         'down': 0,
         'crit_alert_count': 0,
         'warn_alert_count': 0
     }
     cluster_nodes = central_store_util.get_cluster_node_ids(cluster_id)
     for node_id in cluster_nodes:
         try:
             node_context = central_store_util.read(
                 '/clusters/%s/nodes/%s/NodeContext' %
                 (cluster_id, node_id))
         except (EtcdKeyNotFound, AttributeError, EtcdException) as ex:
             Event(
                 ExceptionMessage(priority="debug",
                                  publisher=NS.publisher_id,
                                  payload={
                                      "message":
                                      'Failed to fetch node-context from'
                                      ' /clusters/%s/nodes/%s/NodeContext' %
                                      (cluster_id, node_id),
                                      "exception":
                                      ex
                                  }))
             continue
         status = node_context.get('status')
         if status:
             if status != 'UP':
                 status_wise_count['down'] = status_wise_count['down'] + 1
         status_wise_count['total'] = status_wise_count['total'] + 1
         alerts = []
         try:
             alerts = central_store_util.get_node_alerts(node_id)
         except EtcdKeyNotFound:
             pass
         except (AttributeError, EtcdException) as ex:
             Event(
                 ExceptionMessage(priority="debug",
                                  publisher=NS.publisher_id,
                                  payload={
                                      "message":
                                      'Error fetching alerts for node %s' %
                                      (node_id),
                                      "exception":
                                      ex
                                  }))
         for alert in alerts:
             if alert.get('severity') == 'CRITICAL':
                 status_wise_count['crit_alert_count'] = \
                     status_wise_count['crit_alert_count'] + 1
             elif alert.get('severity') == 'WARNING':
                 status_wise_count['warn_alert_count'] = \
                     status_wise_count['warn_alert_count'] + 1
     return status_wise_count
Exemplo n.º 6
0
 def get_node_services_count(self, node_id):
     services = {}
     try:
         services = central_store_util.read('nodes/%s/Services' % node_id)
     except EtcdKeyNotFound as ex:
         Event(
             ExceptionMessage(priority="debug",
                              publisher=NS.publisher_id,
                              payload={
                                  "message":
                                  'Failed to fetch services of '
                                  'node %s' % node_id,
                                  "exception":
                                  ex
                              }))
     return services
Exemplo n.º 7
0
 def get_services_count(self, cluster_node_ids):
     node_service_counts = {}
     for node_id in cluster_node_ids:
         try:
             services = central_store_util.read('nodes/%s/Services' %
                                                node_id)
         except EtcdKeyNotFound as ex:
             Event(
                 ExceptionMessage(priority="debug",
                                  publisher=NS.publisher_id,
                                  payload={
                                      "message":
                                      'Failed to fetch services of '
                                      'node %s' % node_id,
                                      "exception":
                                      ex
                                  }))
             continue
         for service_name, service_det in services.iteritems():
             try:
                 if service_name in self.supported_services:
                     if service_name not in node_service_counts:
                         service_counter = {'running': 0, 'not_running': 0}
                     else:
                         service_counter = node_service_counts[service_name]
                     if service_det['exists'] == 'True':
                         if service_det['running'] == 'True':
                             service_counter['running'] = \
                                 service_counter['running'] + 1
                         else:
                             service_counter['not_running'] = \
                                 service_counter['not_running'] + 1
                         node_service_counts[service_name] = service_counter
             except (ValueError, AttributeError, KeyError) as ex:
                 Event(
                     ExceptionMessage(priority="debug",
                                      publisher=NS.publisher_id,
                                      payload={
                                          "message":
                                          'Failed to parse services of '
                                          'node %s' % node_id,
                                          "exception":
                                          ex
                                      }))
                 continue
     return node_service_counts
Exemplo n.º 8
0
 def get_services_count(self, cluster_node_ids):
     node_service_counts = {}
     for node_id in cluster_node_ids:
         services = central_store_util.read('nodes/%s/Services' % node_id)
         for service_name, service_det in services.iteritems():
             if service_name in self.supported_services:
                 if service_name not in node_service_counts:
                     service_counter = {'running': 0, 'not_running': 0}
                 else:
                     service_counter = node_service_counts[service_name]
                 if service_det['exists'] == 'True':
                     if service_det['running'] == 'True':
                         service_counter['running'] = \
                             service_counter['running'] + 1
                     else:
                         service_counter['not_running'] = \
                             service_counter['not_running'] + 1
                     node_service_counts[service_name] = service_counter
     return node_service_counts
 def cluster_nodes_summary(self, cluster_id):
     node_summaries = []
     node_ids = central_store_util.get_cluster_node_ids(cluster_id)
     for node_id in node_ids:
         try:
             node_summary = central_store_util.read(
                 '/monitoring/summary/nodes/%s' % node_id)
             node_summaries.append(node_summary)
         except (EtcdKeyNotFound, AttributeError, EtcdException) as ex:
             Event(
                 ExceptionMessage(
                     priority="debug",
                     publisher=NS.publisher_id,
                     payload={
                         "message":
                         'Error caught fetching node summary of'
                         ' node %s.' % node_id,
                         "exception":
                         ex
                     }))
             continue
     return node_summaries
Exemplo n.º 10
0
 def get_node_services_count(self, node_id):
     services = central_store_util.read('nodes/%s/Services' % node_id)
     return services