def send_email(current_ip, result): reporter = Reporter() email_detail = 'ip: ' + current_ip + '\n\n' + result reporter.send_email(email_title, email_detail)
class IndicesParser(object): def __init__(self): self.reporter = Reporter() def parse_data(self, indices_data, latest_indices_data, es_config_info): esindex_prefix = es_config_info['esindex_prefix'] data_type = es_config_info['data_type'] store_size_unit = es_config_info['store_size_unit'] values = [] timestamp = time.strftime('%Y-%m-%dT%H:%M:%S+08:00') store_size = 'store_size_in_' + store_size_unit pri_store_size = 'pri_store_size_in_' + store_size_unit final_data_template = { '@timestamp': timestamp, 'appname': 'monitor', 'type': data_type, 'indices_status': { 'health': {}, 'status': {}, 'index_name': {}, 'uuid': {}, 'pri': {}, 'rep': {}, 'docs_count': {}, 'docs_deleted': {}, store_size: {}, pri_store_size: {}, 'added_docs_count': {} # optional attr } } temp_container = {} # uuid:docs_count for index_data in indices_data: index_attr = index_data.strip().split() if not index_attr or len(index_attr) != 10: continue temp_container[index_attr[3]] = index_attr[6] for i, latest_index_data in enumerate(latest_indices_data): latest_index_attr = latest_index_data.strip().split() if not latest_index_attr or len(latest_index_attr) != 10: continue final_data = copy.deepcopy(final_data_template) final_data['indices_status']['health'] = latest_index_attr[0] final_data['indices_status']['status'] = latest_index_attr[1] final_data['indices_status']['index_name'] = latest_index_attr[2] final_data['indices_status']['uuid'] = latest_index_attr[3] final_data['indices_status']['pri'] = latest_index_attr[4] final_data['indices_status']['rep'] = latest_index_attr[5] final_data['indices_status']['docs_count'] = int( latest_index_attr[6]) final_data['indices_status']['docs_deleted'] = int( latest_index_attr[7]) store_size_value = self.store_size_unit_transition( store_size_unit, latest_index_attr[8], latest_index_attr[2]) pri_store_size_value = self.store_size_unit_transition( store_size_unit, latest_index_attr[9], latest_index_attr[2]) if store_size_value == -1 or pri_store_size_value == -1: continue final_data['indices_status'][store_size] = store_size_value final_data['indices_status'][pri_store_size] = pri_store_size_value if len(latest_indices_data) != len( indices_data): # extra defined field final_data['indices_status']['added_docs_count'] = 0 elif final_data['indices_status']['uuid'] not in temp_container: final_data['indices_status']['added_docs_count'] = 0 else: docs_count = temp_container[final_data['indices_status'] ['uuid']] added_docs_count = int(latest_index_attr[6]) - int(docs_count) final_data['indices_status'][ 'added_docs_count'] = added_docs_count esindex = "%s-%s" % (esindex_prefix, time.strftime('%Y.%m.%d')) values.append({ "_index": esindex, "_type": data_type, "_source": final_data }) return values # unit transition(只考虑bytes到gb,单索引存储太多的数据是一种ugly design) # todo...从时间复杂度上考虑,有没有更巧妙的算法来实现这个逻辑? def store_size_unit_transition(self, store_size_unit, store_size_value, index_name): if 'tb' in store_size_value: email_title = u'上海ES集群告警' email_detail = u'以下索引占用磁盘空间已经达到tb级别,请调整相关设置,该索引现在开始将不进行数据统计:' + index_name self.reporter.send_email(email_title, email_detail) return -1 if store_size_unit == 'bytes': if store_size_value.endswith('gb'): store_size_value = float( store_size_value.strip('gb')) * 1024 * 1024 * 1024 elif store_size_value.endswith('mb'): store_size_value = float( store_size_value.strip('mb')) * 1024 * 1024 elif store_size_value.endswith('kb'): store_size_value = float(store_size_value.strip('kb')) * 1024 else: store_size_value = float(store_size_value.strip('b')) return store_size_value elif store_size_unit == 'kb': if store_size_value.endswith('gb'): store_size_value = float( store_size_value.strip('gb')) * 1024 * 1024 elif store_size_value.endswith('mb'): store_size_value = float(store_size_value.strip('mb')) * 1024 elif store_size_value.endswith('kb'): store_size_value = float(store_size_value.strip('kb')) else: store_size_value = float(store_size_value.strip('b')) / 1024 return store_size_value elif store_size_unit == 'mb': if store_size_value.endswith('gb'): store_size_value = float(store_size_value.strip('gb')) * 1024 elif store_size_value.endswith('mb'): store_size_value = float(store_size_value.strip('mb')) elif store_size_value.endswith('kb'): store_size_value = float(store_size_value.strip('kb')) / 1024 else: store_size_value = float( store_size_value.strip('b')) / 1024 / 1024 return store_size_value else: if store_size_value.endswith('gb'): store_size_value = float(store_size_value.strip('gb')) elif store_size_value.endswith('mb'): store_size_value = float(store_size_value.strip('mb')) / 1024 elif store_size_value.endswith('kb'): store_size_value = float( store_size_value.strip('kb')) / 1024 / 1024 else: store_size_value = float( store_size_value.strip('b')) / 1024 / 1024 / 1024 return store_size_value
class IndicesChecker(object): def __init__(self): conf = ConfigParser.ConfigParser() conf.read(CONFIG_PATH) self.es_url = conf.get("ES", "es_url") self.esindex_prefix = conf.get("ES", "esindex_prefix") self.data_type = conf.get("indices_module", "data_type") self.sampling_speed = int(conf.get("indices_module", "sampling_speed")) self.store_size_unit = conf.get("indices_module", "store_size_unit") self.indices_parser = IndicesParser() self.reporter = Reporter() def start_indices_task(self): if_send_email = True while True: logger.info('Start indices_checker') try: es = Elasticsearch(self.es_url) indices_data = self.get_indices_status(self.es_url) time.sleep(self.sampling_speed) values = self.make_indices_data(indices_data) self.send_data(es, values) if_send_email = True for conn in es.transport.connection_pool.connections: conn.pool.close() logger.info('Indices_checker is running normally') except pycurl.error: if if_send_email: email_title = u'上海ES集群告警' email_detail = u'以下节点或整个集群出现异常,请进行检查:' + self.es_url self.reporter.send_email(email_title, email_detail) if_send_email = False logger.warn( 'Indices_checker fails to connect to es, we will send an alert e-mail' ) else: logger.warn( 'Indices_checker fails to connect to es, we have already sent an alert e-mail' ) except Exception as e: logger.error( 'Some exception happened to indices_checker, details are as follows:' ) logger.error(e) finally: logger.info('Finish indices_checker\n') def get_indices_status(self, es_url): b = StringIO.StringIO() c = pycurl.Curl() c.setopt(pycurl.URL, 'http://' + es_url + ':9200/_cat/indices') c.setopt(pycurl.WRITEFUNCTION, b.write) c.perform() body = b.getvalue() c.close() return body.split('\n') def make_indices_data(self, indices_data): latest_indices_data = self.get_indices_status(self.es_url) es_config_info = { 'esindex_prefix': self.esindex_prefix, 'data_type': self.data_type, 'store_size_unit': self.store_size_unit } return self.indices_parser.parse_data(indices_data, latest_indices_data, es_config_info) def send_data(self, es, values): helpers.bulk(es, values)
class NodesChecker(object): def __init__(self): conf = ConfigParser.ConfigParser() conf.read(CONFIG_PATH) self.es_url = conf.get("ES", "es_url") self.esindex_prefix = conf.get("ES", "esindex_prefix") self.data_type = conf.get("nodes_module", "data_type") self.sampling_speed = int(conf.get("nodes_module", "sampling_speed")) self.data_structure = conf.get("nodes_module", "data_structure") self.nodes_total_count = conf.get("nodes_module", "nodes_total_count") self.nodes_parser = NodesParser() self.es_template = EsTemplate() self.reporter = Reporter() def start_nodes_task(self): self.es_template.make_nodes_template(self.es_url, self.data_structure) if_send_email = True latest_check_time = time.strftime('%Y-%m-%dT%H时') while True: logger.info('Start nodes_checker') try: es = Elasticsearch(self.es_url) nodes_count, nodes_data = self.get_nodes_status(self.es_url) if nodes_count < int(self.nodes_total_count) and time.strftime('%Y-%m-%dT%H时') != latest_check_time: email_title = u'上海ES集群告警' email_detail = u'该集群共有' + str(self.nodes_total_count) + u'个节点,目前只有' + str(nodes_count) + u'个处于正常状态,请检查!' self.reporter.send_email(email_title, email_detail) latest_check_time = time.strftime('%Y-%m-%dT%H时') time.sleep(self.sampling_speed) values = self.make_nodes_data(nodes_count, nodes_data) self.send_data(es, values) if_send_email = True for conn in es.transport.connection_pool.connections: conn.pool.close() logger.info('Nodes_checker is running normally') except pycurl.error: if if_send_email: email_title = u'上海ES集群告警' email_detail = u'以下节点或整个集群出现异常,请进行检查:' + self.es_url self.reporter.send_email(email_title, email_detail) if_send_email = False logger.warn('Nodes_checker fails to connect to es, we will send an alert e-mail') else: logger.warn('Nodes_checker fails to connect to es, we have already sent an alert e-mail') except Exception as e: logger.error('Some exception happened to nodes_checker, details are as follows:') logger.error(e) finally: logger.info('Finish nodes_checker\n') def get_nodes_status(self, es_url): b = StringIO.StringIO() c = pycurl.Curl() c.setopt(pycurl.URL, 'http://'+es_url+':9200/_nodes/stats/indices,os,process,jvm,fs,transport/search') c.setopt(pycurl.WRITEFUNCTION, b.write) c.perform() body = b.getvalue() c.close() data = json.loads(body) nodes_data = data['nodes'] nodes_count = len(nodes_data.keys()) return nodes_count, nodes_data def make_nodes_data(self, nodes_count, nodes_data): latest_nodes_count, latest_nodes_data = self.get_nodes_status(self.es_url) if latest_nodes_count < nodes_count: node_ip_list = [nodes_data[node]['ip'].split(':')[0] for node in nodes_data.keys()] latest_node_ip_list = [latest_nodes_data[node]['ip'].split(':')[0] for node in latest_nodes_data.keys()] lost_node_ip = [node_ip for node_ip in node_ip_list if node_ip not in latest_node_ip_list] email_title = u'上海ES集群告警' email_detail = u'以下节点的ES从原集群中脱离了,请进行检查:' + str(lost_node_ip) self.reporter.send_email(email_title, email_detail) return [] elif latest_nodes_count > nodes_count: node_ip_list = [nodes_data[node]['ip'].split(':')[0] for node in nodes_data.keys()] latest_node_ip_list = [latest_nodes_data[node]['ip'].split(':')[0] for node in latest_nodes_data.keys()] added_node_ip = [latest_node_ip for latest_node_ip in latest_node_ip_list if latest_node_ip not in node_ip_list] email_title = u'上海ES集群告警' email_detail = u'新加入了以下节点,请确认:' + str(added_node_ip) self.reporter.send_email(email_title, email_detail) return [] else: es_config_info = {'esindex_prefix': self.esindex_prefix, 'data_type': self.data_type} if self.data_structure == 'flat': return self.nodes_parser.parse2flat_data(nodes_data, latest_nodes_data, es_config_info) else: return self.nodes_parser.parse2nested_data(nodes_data, latest_nodes_data, es_config_info) def send_data(self, es, values): helpers.bulk(es, values)