def check_instance_state(self): ''' 获取所有的instance状态,报告哪些有问题 ''' service_role_map = {} #get all instance session = database.getSession() for instance in session.query(Instance): result,msg = self.check_instance(instance) self.update_result(result,msg) if not service_role_map.has_key(instance.service) : service_role_map[instance.service] = [] service_role_map[instance.service].append(instance) for (service,roles) in service_role_map.items(): if service == "zookeeper" : zk_leader_port = database.get_service_conf(session,"zookeeper","zookeeper_leader_port") result,msg = self.check_zk_leader(zk_leader_port,roles) self.update_result(result,msg) elif service == "hbase" : #TODO hbase_master_info_port = database.get_service_conf(session,"hbase","hbase_master_info_port") result,msg = self.check_hbase_master(hbase_master_info_port,roles) self.update_result(result,msg) session.close() return (self.state,self.msg)
def check_instance_state(self): ''' 获取所有的instance状态,报告哪些有问题 ''' service_role_map = {} #get all instance session = database.getSession() for instance in session.query(Instance): #检查实例 key_word,msg = self.check_instance(instance) if key_word != None: self.update_result(key_word, msg) #采集服务 if not service_role_map.has_key(instance.service) : service_role_map[instance.service] = [] service_role_map[instance.service].append(instance) for (service,roles) in service_role_map.items(): if service == "zookeeper" : zk_leader_port = database.get_service_conf(session,"zookeeper","zookeeper_leader_port") result,msg = self.check_zk_leader(zk_leader_port,roles) if not result: key_word = "cluster(zk no leader)" self.update_result(key_word, msg) elif service == "hbase" : #TODO hbase_master_info_port = database.get_service_conf(session,"hbase","hbase_master_info_port") result,msg = self.check_hbase_master(hbase_master_info_port,roles) if not result: key_word = "cluster(hbase no leader)" self.update_result(key_word, msg) session.close() return self.alarm_list
def get_cluster_name(): cluster_name = "" session = database.getSession() cluster_name = database.get_service_conf(session, "ganglia", "cluster_name") session.close() return cluster_name
def namenode_web(self): session = database.getSession() nm_web_port = database.get_service_conf(session,"hdfs","dfs_namenode_http_address_port") nms = [] for inst in session.query(Instance).filter(Instance.role == "namenode"): nms.append(inst.host) session.close() return namenode_web.namenode_web(nms, nm_web_port)
def init_config(self): ''' 初始化关于获取端口等的内容 默认从数据库中获取,由于可能只使用监控 所以本函数可以直接修改为固定值 ''' session = database.getSession() insts = session.query(Instance).filter(Instance.role=="resourcemanager") for inst in insts: self.rmhost = inst.host; self.rmport=database.get_service_conf(session,"yarn","yarn_rm_webapp_port") insts = session.query(Instance).filter(Instance.role=="historyserver") for inst in insts: self.hshost = inst.host; self.hsport = database.get_service_conf(session,"yarn","mapreduce_jobhistory_webapp_port") session.close()
def resourcemanager_web(self): session = database.getSession() rm_port = database.get_service_conf(session,"yarn","yarn_rm_webapp_port") rms = [] for inst in session.query(Instance).filter(Instance.role == "resourcemanager"): rms.append(inst.host) session.close() if len(rms) == 0 : return (contants.ALARM_ERROR, u"%s 检查不到有resourcemanager" % self.rule.name ) if len(rms) != 1 : return (contants.ALARM_ERROR, u"%s 检查到有多个resourcemanager %s" % (self.rule.name, ",".join(rms) ) ) return resourcemanager_web.resourcemanager_web(rms[0],rm_port)
def init_config(self): ''' 初始化关于获取端口等的内容 默认从数据库中获取,由于可能只使用监控 所以本函数可以直接修改为固定值 ''' session = database.getSession() insts = session.query(Instance).filter( Instance.role == "resourcemanager") for inst in insts: self.rmhost = inst.host self.rmport = database.get_service_conf(session, "yarn", "yarn_rm_webapp_port") insts = session.query(Instance).filter( Instance.role == "historyserver") for inst in insts: self.hshost = inst.host self.hsport = database.get_service_conf( session, "yarn", "mapreduce_jobhistory_webapp_port") session.close()
def check_instance_state(self): ''' 获取所有的instance状态,报告哪些有问题 ''' service_role_map = {} #get all instance session = database.getSession() for instance in session.query(Instance): #检查实例 key_word, msg = self.check_instance(instance) if key_word != None: self.update_result(key_word, msg) #采集服务 if not service_role_map.has_key(instance.service): service_role_map[instance.service] = [] service_role_map[instance.service].append(instance) for (service, roles) in service_role_map.items(): if service == "zookeeper": zk_leader_port = database.get_service_conf( session, "zookeeper", "zookeeper_leader_port") result, msg = self.check_zk_leader(zk_leader_port, roles) if not result: key_word = "cluster(zk no leader)" self.update_result(key_word, msg) elif service == "hbase": #TODO hbase_master_info_port = database.get_service_conf( session, "hbase", "hbase_master_info_port") result, msg = self.check_hbase_master(hbase_master_info_port, roles) if not result: key_word = "cluster(hbase no leader)" self.update_result(key_word, msg) session.close() return self.alarm_list
def resourcemanager_web(self): session = database.getSession() rm_port = database.get_service_conf(session,"yarn","yarn_rm_webapp_port") rms = [] for inst in session.query(Instance).filter(Instance.role == "resourcemanager"): rms.append(inst.host) session.close() alarm_list = [] if len(rms) == 0 : key_word = "cluster(no rm)" msg = u"%s 检查不到有resourcemanager" % self.rule.name return [{"key_word":key_word,"msg":msg}] if len(rms) != 1 : key_word = "cluster(too much rm)" msg = u"%s 检查到有多个resourcemanager %s" % (self.rule.name, ",".join(rms) ) return [{"key_word":key_word,"msg":msg}] return resourcemanager_web.resourcemanager_web(rms[0],rm_port)
def get_cluster_name(): cluster_name = "" session = database.getSession() cluster_name = database.get_service_conf(session,"ganglia","cluster_name") session.close() return cluster_name