class Controller(object): @staticmethod def fail_cb(manager, entrypoint, exception): sandesh_global._logger.info("Load failed for %s with exception %s" % \ (str(entrypoint),str(exception))) def __init__(self, conf): self._conf = conf module = Module.ALARM_GENERATOR self._moduleid = ModuleNames[module] node_type = Module2NodeType[module] self._node_type_name = NodeTypeNames[node_type] self._hostname = socket.gethostname() self._instance_id = self._conf.worker_id() sandesh_global.init_generator(self._moduleid, self._hostname, self._node_type_name, self._instance_id, self._conf.collectors(), self._node_type_name, self._conf.http_port(), ['opserver.sandesh', 'sandesh']) sandesh_global.set_logging_params( enable_local_log=self._conf.log_local(), category=self._conf.log_category(), level=self._conf.log_level(), file=self._conf.log_file(), enable_syslog=self._conf.use_syslog(), syslog_facility=self._conf.syslog_facility()) self._logger = sandesh_global._logger # Trace buffer list self.trace_buf = [ {'name':'DiscoveryMsg', 'size':1000} ] # Create trace buffers for buf in self.trace_buf: sandesh_global.trace_buffer_create(name=buf['name'], size=buf['size']) tables = [ "ObjectCollectorInfo", "ObjectDatabaseInfo", "ObjectVRouter", "ObjectBgpRouter", "ObjectConfigNode" ] self.mgrs = {} self.tab_alarms = {} for table in tables: self.mgrs[table] = hook.HookManager( namespace='contrail.analytics.alarms', name=table, invoke_on_load=True, invoke_args=(), on_load_failure_callback=Controller.fail_cb ) for extn in self.mgrs[table][table]: self._logger.info('Loaded extensions for %s: %s,%s' % \ (table, extn.name, extn.entry_point_target)) self.tab_alarms[table] = {} ConnectionState.init(sandesh_global, self._hostname, self._moduleid, self._instance_id, staticmethod(ConnectionState.get_process_state_cb), NodeStatusUVE, NodeStatus) self._us = UVEServer(None, self._logger, self._conf.redis_password()) self._workers = {} self.disc = None self._libpart_name = self._hostname + ":" + self._instance_id self._libpart = None self._partset = set() if self._conf.discovery()['server']: import discoveryclient.client as client data = { 'ip-address': self._hostname , 'port': self._instance_id } self.disc = client.DiscoveryClient( self._conf.discovery()['server'], self._conf.discovery()['port'], ModuleNames[Module.ALARM_GENERATOR]) self._logger.info("Disc Publish to %s : %s" % (str(self._conf.discovery()), str(data))) self.disc.publish(ALARM_GENERATOR_SERVICE_NAME, data) else: # If there is no discovery service, use fixed redis_uve list redis_uve_list = [] try: for redis_uve in self._conf.redis_uve_list(): redis_ip_port = redis_uve.split(':') redis_ip_port = (redis_ip_port[0], int(redis_ip_port[1])) redis_uve_list.append(redis_ip_port) except Exception as e: self._logger.error('Failed to parse redis_uve_list: %s' % e) else: self._us.update_redis_uve_list(redis_uve_list) # If there is no discovery service, use fixed alarmgen list self._libpart = self.start_libpart(self._conf.alarmgen_list()) PartitionOwnershipReq.handle_request = self.handle_PartitionOwnershipReq PartitionStatusReq.handle_request = self.handle_PartitionStatusReq UVETableAlarmReq.handle_request = self.handle_UVETableAlarmReq def libpart_cb(self, part_list): newset = set(part_list) oldset = self._partset self._partset = newset self._logger.info('Partition List : new %s old %s' % \ (str(newset),str(oldset))) for addpart in (newset-oldset): self._logger.info('Partition Add : %s' % addpart) self.partition_change(addpart, True) for delpart in (oldset-newset): self._logger.info('Partition Del : %s' % delpart) self.partition_change(delpart, True) def start_libpart(self, ag_list): if not self._conf.zk_list(): self._logger.error('Could not import libpartition: No zookeeper') return None if not ag_list: self._logger.error('Could not import libpartition: No alarmgen list') return None try: from libpartition.libpartition import PartitionClient self._logger.error('Starting PC') pc = PartitionClient("alarmgen", self._libpart_name, ag_list, self._conf.partitions(), self.libpart_cb, ','.join(self._conf.zk_list())) self._logger.error('Started PC') return pc except Exception as e: self._logger.error('Could not import libpartition: %s' % str(e)) return None def handle_uve_notif(self, uves, remove = False): self._logger.debug("Changed UVEs : %s" % str(uves)) no_handlers = set() for uv in uves: tab = uv.split(':',1)[0] uve_name = uv.split(':',1)[1] if not self.mgrs.has_key(tab): no_handlers.add(tab) continue if remove: uve_data = [] else: filters = {'kfilt': [uve_name]} itr = self._us.multi_uve_get(tab, True, filters) uve_data = itr.next()['value'] if len(uve_data) == 0: self._logger.info("UVE %s deleted" % uv) if self.tab_alarms[tab].has_key(uv): del self.tab_alarms[tab][uv] uname = uv.split(":",1)[1] ustruct = UVEAlarms(name = uname, deleted = True) alarm_msg = AlarmTrace(data=ustruct, table=tab) self._logger.info('send del alarm: %s' % (alarm_msg.log())) alarm_msg.send() continue results = self.mgrs[tab].map_method("__call__", uv, uve_data) new_uve_alarms = {} for res in results: nm, errs = res self._logger.debug("Alarm[%s] %s: %s" % (tab, nm, str(errs))) elems = [] for ae in errs: rule, val = ae rv = AlarmElement(rule, val) elems.append(rv) if len(elems): new_uve_alarms[nm] = UVEAlarmInfo(type = nm, description = elems, ack = False) if (not self.tab_alarms[tab].has_key(uv)) or \ pprint.pformat(self.tab_alarms[tab][uv]) != \ pprint.pformat(new_uve_alarms): uname = uv.split(":")[1] ustruct = UVEAlarms(name = uname, alarms = new_uve_alarms.values(), deleted = False) alarm_msg = AlarmTrace(data=ustruct, table=tab) self._logger.info('send alarm: %s' % (alarm_msg.log())) alarm_msg.send() self.tab_alarms[tab][uv] = new_uve_alarms if len(no_handlers): self._logger.debug('No Alarm Handlers for %s' % str(no_handlers)) def handle_UVETableAlarmReq(self, req): status = False if req.table == "all": parts = self.tab_alarms.keys() else: parts = [req.table] self._logger.info("Got UVETableAlarmReq : %s" % str(parts)) np = 1 for pt in parts: resp = UVETableAlarmResp(table = pt) uves = [] for uk,uv in self.tab_alarms[pt].iteritems(): alms = [] for ak,av in uv.iteritems(): alms.append(av) uves.append(UVEAlarms(name = uk, alarms = alms)) resp.uves = uves if np == len(parts): mr = False else: mr = True resp.response(req.context(), mr) np = np + 1 def partition_change(self, partno, enl): """ Call this function when getting or giving up ownership of a partition Args: partno : Partition Number enl : True for acquiring, False for giving up Returns: status of operation (True for success) """ status = False if enl: if self._workers.has_key(partno): self._logger.info("Dup partition %d" % partno) else: #uvedb = self._us.get_part(partno) ph = UveStreamProc(','.join(self._conf.kafka_broker_list()), partno, "uve-" + str(partno), self._logger, self._us.get_part, self.handle_uve_notif) ph.start() self._workers[partno] = ph status = True else: if self._workers.has_key(partno): ph = self._workers[partno] gevent.kill(ph) res,db = ph.get() print "Returned " + str(res) print "State :" for k,v in db.iteritems(): print "%s -> %s" % (k,str(v)) del self._workers[partno] status = True else: self._logger.info("No partition %d" % partno) return status def handle_PartitionOwnershipReq(self, req): self._logger.info("Got PartitionOwnershipReq: %s" % str(req)) status = self.partition_change(req.partition, req.ownership) resp = PartitionOwnershipResp() resp.status = status resp.response(req.context()) def process_stats(self): ''' Go through the UVEKey-Count stats collected over the previous time period over all partitions and send it out ''' for pk,pc in self._workers.iteritems(): din, dout = pc.stats() for ktab,tab in dout.iteritems(): au = AlarmgenUpdate() au.name = self._hostname au.instance = self._instance_id au.table = ktab au.partition = pk au.keys = [] for uk,uc in tab.iteritems(): ukc = UVEKeyInfo() ukc.key = uk ukc.count = uc au.keys.append(ukc) au_trace = AlarmgenUpdateTrace(data=au) self._logger.debug('send key stats: %s' % (au_trace.log())) au_trace.send() for ktab,tab in din.iteritems(): au = AlarmgenUpdate() au.name = self._hostname au.instance = self._instance_id au.table = ktab au.partition = pk au.notifs = [] for kcoll,coll in tab.iteritems(): for kgen,gen in coll.iteritems(): for tk,tc in gen.iteritems(): tkc = UVETypeInfo() tkc.type= tk tkc.count = tc tkc.generator = kgen tkc.collector = kcoll au.notifs.append(tkc) au_trace = AlarmgenUpdateTrace(data=au) self._logger.debug('send notif stats: %s' % (au_trace.log())) au_trace.send() def handle_PartitionStatusReq(self, req): ''' Return the entire contents of the UVE DB for the requested partitions ''' if req.partition == -1: parts = self._workers.keys() else: parts = [req.partition] self._logger.info("Got PartitionStatusReq: %s" % str(parts)) np = 1 for pt in parts: resp = PartitionStatusResp() resp.partition = pt if self._workers.has_key(pt): resp.enabled = True resp.uves = [] for kcoll,coll in self._workers[pt].contents().iteritems(): uci = UVECollInfo() uci.collector = kcoll uci.uves = [] for kgen,gen in coll.iteritems(): ugi = UVEGenInfo() ugi.generator = kgen ugi.uves = [] for uk,uc in gen.iteritems(): ukc = UVEKeyInfo() ukc.key = uk ukc.count = uc ugi.uves.append(ukc) uci.uves.append(ugi) resp.uves.append(uci) else: resp.enabled = False if np == len(parts): mr = False else: mr = True resp.response(req.context(), mr) np = np + 1 def disc_cb_coll(self, clist): ''' Analytics node may be brought up/down any time. For UVE aggregation, alarmgen needs to know the list of all Analytics nodes (redis-uves). Periodically poll the Collector list [in lieu of redi-uve nodes] from the discovery. ''' newlist = [] for elem in clist: (ipaddr,port) = elem newlist.append((ipaddr, self._conf.redis_server_port())) self._us.update_redis_uve_list(newlist) def disc_cb_ag(self, alist): ''' Analytics node may be brought up/down any time. For partitioning, alarmgen needs to know the list of all Analytics nodes (alarmgens). Periodically poll the alarmgen list from the discovery service ''' newlist = [] for elem in alist: (ipaddr, inst) = elem newlist.append(ipaddr + ":" + inst) # We should always include ourselves in the list of memebers newset = set(newlist) newset.add(self._libpart_name) newlist = list(newset) if not self._libpart: self._libpart = self.start_libpart(newlist) else: self._libpart.update_cluster_list(newlist) def run(self): alarmgen_cpu_info = CpuInfoData() while True: before = time.time() mod_cpu_info = ModuleCpuInfo() mod_cpu_info.module_id = self._moduleid mod_cpu_info.instance_id = self._instance_id mod_cpu_info.cpu_info = alarmgen_cpu_info.get_cpu_info( system=False) mod_cpu_state = ModuleCpuState() mod_cpu_state.name = self._hostname mod_cpu_state.module_cpu_info = [mod_cpu_info] alarmgen_cpu_state_trace = ModuleCpuStateTrace(data=mod_cpu_state) alarmgen_cpu_state_trace.send() aly_cpu_state = AnalyticsCpuState() aly_cpu_state.name = self._hostname aly_cpu_info = ProcessCpuInfo() aly_cpu_info.module_id= self._moduleid aly_cpu_info.inst_id = self._instance_id aly_cpu_info.cpu_share = mod_cpu_info.cpu_info.cpu_share aly_cpu_info.mem_virt = mod_cpu_info.cpu_info.meminfo.virt aly_cpu_info.mem_res = mod_cpu_info.cpu_info.meminfo.res aly_cpu_state.cpu_info = [aly_cpu_info] aly_cpu_state_trace = AnalyticsCpuStateTrace(data=aly_cpu_state) aly_cpu_state_trace.send() # Send out the UVEKey-Count stats for this time period self.process_stats() duration = time.time() - before if duration < 60: gevent.sleep(60 - duration) else: self._logger.error("Periodic collection took %s sec" % duration)
class Controller(object): @staticmethod def fail_cb(manager, entrypoint, exception): sandesh_global._logger.info("Load failed for %s with exception %s" % \ (str(entrypoint),str(exception))) def __init__(self, conf): self._conf = conf module = Module.ALARM_GENERATOR self._moduleid = ModuleNames[module] node_type = Module2NodeType[module] self._node_type_name = NodeTypeNames[node_type] self._hostname = socket.gethostname() self._instance_id = self._conf.worker_id() sandesh_global.init_generator(self._moduleid, self._hostname, self._node_type_name, self._instance_id, self._conf.collectors(), self._node_type_name, self._conf.http_port(), ['opserver.sandesh', 'sandesh'], host_ip=self._conf.host_ip()) sandesh_global.set_logging_params( enable_local_log=self._conf.log_local(), category=self._conf.log_category(), level=self._conf.log_level(), file=self._conf.log_file(), enable_syslog=self._conf.use_syslog(), syslog_facility=self._conf.syslog_facility()) self._logger = sandesh_global._logger # Trace buffer list self.trace_buf = [ {'name':'DiscoveryMsg', 'size':1000} ] # Create trace buffers for buf in self.trace_buf: sandesh_global.trace_buffer_create(name=buf['name'], size=buf['size']) tables = [ "ObjectCollectorInfo", "ObjectDatabaseInfo", "ObjectVRouter", "ObjectBgpRouter", "ObjectConfigNode" ] self.mgrs = {} self.tab_alarms = {} for table in tables: self.mgrs[table] = hook.HookManager( namespace='contrail.analytics.alarms', name=table, invoke_on_load=True, invoke_args=(), on_load_failure_callback=Controller.fail_cb ) for extn in self.mgrs[table][table]: self._logger.info('Loaded extensions for %s: %s,%s doc %s' % \ (table, extn.name, extn.entry_point_target, extn.obj.__doc__)) self.tab_alarms[table] = {} ConnectionState.init(sandesh_global, self._hostname, self._moduleid, self._instance_id, staticmethod(ConnectionState.get_process_state_cb), NodeStatusUVE, NodeStatus) self._us = UVEServer(None, self._logger, self._conf.redis_password()) self._workers = {} self.disc = None self._libpart_name = self._hostname + ":" + self._instance_id self._libpart = None self._partset = set() if self._conf.discovery()['server']: import discoveryclient.client as client data = { 'ip-address': self._hostname , 'port': self._instance_id } self.disc = client.DiscoveryClient( self._conf.discovery()['server'], self._conf.discovery()['port'], ModuleNames[Module.ALARM_GENERATOR]) self._logger.info("Disc Publish to %s : %s" % (str(self._conf.discovery()), str(data))) self.disc.publish(ALARM_GENERATOR_SERVICE_NAME, data) else: # If there is no discovery service, use fixed redis_uve list redis_uve_list = [] try: for redis_uve in self._conf.redis_uve_list(): redis_ip_port = redis_uve.split(':') redis_ip_port = (redis_ip_port[0], int(redis_ip_port[1])) redis_uve_list.append(redis_ip_port) except Exception as e: self._logger.error('Failed to parse redis_uve_list: %s' % e) else: self._us.update_redis_uve_list(redis_uve_list) # If there is no discovery service, use fixed alarmgen list self._libpart = self.start_libpart(self._conf.alarmgen_list()) PartitionOwnershipReq.handle_request = self.handle_PartitionOwnershipReq PartitionStatusReq.handle_request = self.handle_PartitionStatusReq UVETableAlarmReq.handle_request = self.handle_UVETableAlarmReq def libpart_cb(self, part_list): agpi = AlarmgenPartionInfo() agpi.instance = self._instance_id agpi.partitions = part_list agp = AlarmgenPartition() agp.name = self._hostname agp.inst_parts = [agpi] agp_trace = AlarmgenPartitionTrace(data=agp) agp_trace.send() newset = set(part_list) oldset = self._partset self._partset = newset self._logger.error('Partition List : new %s old %s' % \ (str(newset),str(oldset))) for addpart in (newset-oldset): self._logger.error('Partition Add : %s' % addpart) self.partition_change(addpart, True) for delpart in (oldset-newset): self._logger.error('Partition Del : %s' % delpart) self.partition_change(delpart, False) def start_libpart(self, ag_list): if not self._conf.zk_list(): self._logger.error('Could not import libpartition: No zookeeper') return None if not ag_list: self._logger.error('Could not import libpartition: No alarmgen list') return None try: from libpartition.libpartition import PartitionClient self._logger.error('Starting PC') agpi = AlarmgenPartionInfo() agpi.instance = self._instance_id agpi.partitions = [] agp = AlarmgenPartition() agp.name = self._hostname agp.inst_parts = [agpi] agp_trace = AlarmgenPartitionTrace(data=agp) agp_trace.send() pc = PartitionClient("alarmgen", self._libpart_name, ag_list, self._conf.partitions(), self.libpart_cb, ','.join(self._conf.zk_list())) self._logger.error('Started PC') return pc except Exception as e: self._logger.error('Could not import libpartition: %s' % str(e)) return None def handle_uve_notif(self, uves, remove = False): self._logger.debug("Changed UVEs : %s" % str(uves)) no_handlers = set() for uv in uves: tab = uv.split(':',1)[0] uve_name = uv.split(':',1)[1] if not self.mgrs.has_key(tab): no_handlers.add(tab) continue if remove: uve_data = [] else: filters = {'kfilt': [uve_name]} itr = self._us.multi_uve_get(tab, True, filters) uve_data = itr.next()['value'] if len(uve_data) == 0: self._logger.info("UVE %s deleted" % uv) if self.tab_alarms[tab].has_key(uv): del self.tab_alarms[tab][uv] ustruct = UVEAlarms(name = uve_name, deleted = True) alarm_msg = AlarmTrace(data=ustruct, table=tab) self._logger.info('send del alarm: %s' % (alarm_msg.log())) alarm_msg.send() continue results = self.mgrs[tab].map_method("__call__", uv, uve_data) new_uve_alarms = {} for res in results: nm, sev, errs = res self._logger.debug("Alarm[%s] %s: %s" % (tab, nm, str(errs))) elems = [] for ae in errs: rule, val = ae rv = AlarmElement(rule, val) elems.append(rv) if len(elems): new_uve_alarms[nm] = UVEAlarmInfo(type = nm, severity = sev, timestamp = 0, description = elems, ack = False) del_types = [] if self.tab_alarms[tab].has_key(uv): for nm, uai in self.tab_alarms[tab][uv].iteritems(): uai2 = copy.deepcopy(uai) uai2.timestamp = 0 # This type was present earlier, but is now gone if not new_uve_alarms.has_key(nm): del_types.append(nm) else: # This type has no new information if pprint.pformat(uai2) == \ pprint.pformat(new_uve_alarms[nm]): del new_uve_alarms[nm] if len(del_types) != 0 or \ len(new_uve_alarms) != 0: self._logger.debug("Alarm[%s] Deleted %s" % \ (tab, str(del_types))) self._logger.debug("Alarm[%s] Updated %s" % \ (tab, str(new_uve_alarms))) # These alarm types are new or updated for nm, uai2 in new_uve_alarms.iteritems(): uai = copy.deepcopy(uai2) uai.timestamp = UTCTimestampUsec() if not self.tab_alarms[tab].has_key(uv): self.tab_alarms[tab][uv] = {} self.tab_alarms[tab][uv][nm] = uai # These alarm types are now gone for dnm in del_types: del self.tab_alarms[tab][uv][dnm] ustruct = None if len(self.tab_alarms[tab][uv]) == 0: ustruct = UVEAlarms(name = uve_name, deleted = True) del self.tab_alarms[tab][uv] else: ustruct = UVEAlarms(name = uve_name, alarms = self.tab_alarms[tab][uv].values(), deleted = False) alarm_msg = AlarmTrace(data=ustruct, table=tab) self._logger.info('send alarm: %s' % (alarm_msg.log())) alarm_msg.send() if len(no_handlers): self._logger.debug('No Alarm Handlers for %s' % str(no_handlers)) def handle_UVETableAlarmReq(self, req): status = False if req.table == "all": parts = self.tab_alarms.keys() else: parts = [req.table] self._logger.info("Got UVETableAlarmReq : %s" % str(parts)) np = 1 for pt in parts: resp = UVETableAlarmResp(table = pt) uves = [] for uk,uv in self.tab_alarms[pt].iteritems(): alms = [] for ak,av in uv.iteritems(): alms.append(av) uves.append(UVEAlarms(name = uk, alarms = alms)) resp.uves = uves if np == len(parts): mr = False else: mr = True resp.response(req.context(), mr) np = np + 1 def partition_change(self, partno, enl): """ Call this function when getting or giving up ownership of a partition Args: partno : Partition Number enl : True for acquiring, False for giving up Returns: status of operation (True for success) """ status = False if enl: if self._workers.has_key(partno): self._logger.info("Dup partition %d" % partno) else: #uvedb = self._us.get_part(partno) ph = UveStreamProc(','.join(self._conf.kafka_broker_list()), partno, "uve-" + str(partno), self._logger, self._us.get_part, self.handle_uve_notif) ph.start() self._workers[partno] = ph status = True else: if self._workers.has_key(partno): ph = self._workers[partno] gevent.kill(ph) res,db = ph.get() print "Returned " + str(res) print "State :" for k,v in db.iteritems(): print "%s -> %s" % (k,str(v)) del self._workers[partno] status = True else: self._logger.info("No partition %d" % partno) return status def handle_PartitionOwnershipReq(self, req): self._logger.info("Got PartitionOwnershipReq: %s" % str(req)) status = self.partition_change(req.partition, req.ownership) resp = PartitionOwnershipResp() resp.status = status resp.response(req.context()) def process_stats(self): ''' Go through the UVEKey-Count stats collected over the previous time period over all partitions and send it out ''' s_partitions = set() s_keys = set() n_updates = 0 for pk,pc in self._workers.iteritems(): s_partitions.add(pk) din, dout = pc.stats() for ktab,tab in dout.iteritems(): au_keys = [] for uk,uc in tab.iteritems(): s_keys.add(uk) n_updates += uc ukc = UVEKeyInfo() ukc.key = uk ukc.count = uc au_keys.append(ukc) au_obj = AlarmgenUpdate(name=sandesh_global._source + ':' + \ sandesh_global._node_type + ':' + \ sandesh_global._module + ':' + \ sandesh_global._instance_id, partition = pk, table = ktab, keys = au_keys, notifs = None) self._logger.debug('send key stats: %s' % (au_obj.log())) au_obj.send() for ktab,tab in din.iteritems(): au_notifs = [] for kcoll,coll in tab.iteritems(): for kgen,gen in coll.iteritems(): for tk,tc in gen.iteritems(): tkc = UVETypeInfo() tkc.type= tk tkc.count = tc tkc.generator = kgen tkc.collector = kcoll au_notifs.append(tkc) au_obj = AlarmgenUpdate(name=sandesh_global._source + ':' + \ sandesh_global._node_type + ':' + \ sandesh_global._module + ':' + \ sandesh_global._instance_id, partition = pk, table = ktab, keys = None, notifs = au_notifs) self._logger.debug('send notif stats: %s' % (au_obj.log())) au_obj.send() au = AlarmgenStatus() au.name = self._hostname au.counters = [] au.alarmgens = [] ags = AlarmgenStats() ags.instance = self._instance_id ags.partitions = len(s_partitions) ags.keys = len(s_keys) ags.updates = n_updates au.counters.append(ags) agname = sandesh_global._source + ':' + \ sandesh_global._node_type + ':' + \ sandesh_global._module + ':' + \ sandesh_global._instance_id au.alarmgens.append(agname) atrace = AlarmgenStatusTrace(data = au) self._logger.debug('send alarmgen status : %s' % (atrace.log())) atrace.send() def handle_PartitionStatusReq(self, req): ''' Return the entire contents of the UVE DB for the requested partitions ''' if req.partition == -1: parts = self._workers.keys() else: parts = [req.partition] self._logger.info("Got PartitionStatusReq: %s" % str(parts)) np = 1 for pt in parts: resp = PartitionStatusResp() resp.partition = pt if self._workers.has_key(pt): resp.enabled = True resp.uves = [] for kcoll,coll in self._workers[pt].contents().iteritems(): uci = UVECollInfo() uci.collector = kcoll uci.uves = [] for kgen,gen in coll.iteritems(): ugi = UVEGenInfo() ugi.generator = kgen ugi.uves = [] for uk,uc in gen.iteritems(): ukc = UVEKeyInfo() ukc.key = uk ukc.count = uc ugi.uves.append(ukc) uci.uves.append(ugi) resp.uves.append(uci) else: resp.enabled = False if np == len(parts): mr = False else: mr = True resp.response(req.context(), mr) np = np + 1 def disc_cb_coll(self, clist): ''' Analytics node may be brought up/down any time. For UVE aggregation, alarmgen needs to know the list of all Analytics nodes (redis-uves). Periodically poll the Collector list [in lieu of redi-uve nodes] from the discovery. ''' newlist = [] for elem in clist: (ipaddr,port) = elem newlist.append((ipaddr, self._conf.redis_server_port())) self._us.update_redis_uve_list(newlist) def disc_cb_ag(self, alist): ''' Analytics node may be brought up/down any time. For partitioning, alarmgen needs to know the list of all Analytics nodes (alarmgens). Periodically poll the alarmgen list from the discovery service ''' newlist = [] for elem in alist: (ipaddr, inst) = elem newlist.append(ipaddr + ":" + inst) # We should always include ourselves in the list of memebers newset = set(newlist) newset.add(self._libpart_name) newlist = list(newset) if not self._libpart: self._libpart = self.start_libpart(newlist) else: self._libpart.update_cluster_list(newlist) def run(self): alarmgen_cpu_info = CpuInfoData() while True: before = time.time() mod_cpu_info = ModuleCpuInfo() mod_cpu_info.module_id = self._moduleid mod_cpu_info.instance_id = self._instance_id mod_cpu_info.cpu_info = alarmgen_cpu_info.get_cpu_info( system=False) mod_cpu_state = ModuleCpuState() mod_cpu_state.name = self._hostname mod_cpu_state.module_cpu_info = [mod_cpu_info] alarmgen_cpu_state_trace = ModuleCpuStateTrace(data=mod_cpu_state) alarmgen_cpu_state_trace.send() aly_cpu_state = AnalyticsCpuState() aly_cpu_state.name = self._hostname aly_cpu_info = ProcessCpuInfo() aly_cpu_info.module_id= self._moduleid aly_cpu_info.inst_id = self._instance_id aly_cpu_info.cpu_share = mod_cpu_info.cpu_info.cpu_share aly_cpu_info.mem_virt = mod_cpu_info.cpu_info.meminfo.virt aly_cpu_info.mem_res = mod_cpu_info.cpu_info.meminfo.res aly_cpu_state.cpu_info = [aly_cpu_info] aly_cpu_state_trace = AnalyticsCpuStateTrace(data=aly_cpu_state) aly_cpu_state_trace.send() # Send out the UVEKey-Count stats for this time period self.process_stats() duration = time.time() - before if duration < 60: gevent.sleep(60 - duration) else: self._logger.error("Periodic collection took %s sec" % duration)
class Controller(object): @staticmethod def fail_cb(manager, entrypoint, exception): sandesh_global._logger.info("Load failed for %s with exception %s" % \ (str(entrypoint),str(exception))) def __init__(self, conf): self._conf = conf module = Module.ALARM_GENERATOR self._moduleid = ModuleNames[module] node_type = Module2NodeType[module] self._node_type_name = NodeTypeNames[node_type] self._hostname = socket.gethostname() self._instance_id = self._conf.worker_id() sandesh_global.init_generator(self._moduleid, self._hostname, self._node_type_name, self._instance_id, self._conf.collectors(), self._node_type_name, self._conf.http_port(), ['opserver.sandesh', 'sandesh']) sandesh_global.set_logging_params( enable_local_log=self._conf.log_local(), category=self._conf.log_category(), level=self._conf.log_level(), file=self._conf.log_file(), enable_syslog=self._conf.use_syslog(), syslog_facility=self._conf.syslog_facility()) self._logger = sandesh_global._logger # Trace buffer list self.trace_buf = [ {'name':'DiscoveryMsg', 'size':1000} ] # Create trace buffers for buf in self.trace_buf: sandesh_global.trace_buffer_create(name=buf['name'], size=buf['size']) tables = [ "ObjectCollectorInfo", "ObjectDatabaseInfo", "ObjectVRouter", "ObjectBgpRouter", "ObjectConfigNode" ] self.mgrs = {} self.tab_alarms = {} for table in tables: self.mgrs[table] = hook.HookManager( namespace='contrail.analytics.alarms', name=table, invoke_on_load=True, invoke_args=(), on_load_failure_callback=Controller.fail_cb ) for extn in self.mgrs[table][table]: self._logger.info('Loaded extensions for %s: %s,%s' % \ (table, extn.name, extn.entry_point_target)) self.tab_alarms[table] = {} ConnectionState.init(sandesh_global, self._hostname, self._moduleid, self._instance_id, staticmethod(ConnectionState.get_process_state_cb), NodeStatusUVE, NodeStatus) self._us = UVEServer(None, self._logger, self._conf.redis_password()) self._workers = {} self.disc = None self._libpart_name = self._hostname + ":" + self._instance_id self._libpart = None self._partset = set() if self._conf.discovery()['server']: import discoveryclient.client as client data = { 'ip-address': self._hostname , 'port': self._instance_id } self.disc = client.DiscoveryClient( self._conf.discovery()['server'], self._conf.discovery()['port'], ModuleNames[Module.ALARM_GENERATOR]) self._logger.info("Disc Publish to %s : %s" % (str(self._conf.discovery()), str(data))) self.disc.publish(ALARM_GENERATOR_SERVICE_NAME, data) else: # If there is no discovery service, use fixed redis_uve list redis_uve_list = [] try: for redis_uve in self._conf.redis_uve_list(): redis_ip_port = redis_uve.split(':') redis_ip_port = (redis_ip_port[0], int(redis_ip_port[1])) redis_uve_list.append(redis_ip_port) except Exception as e: self._logger.error('Failed to parse redis_uve_list: %s' % e) else: self._us.update_redis_uve_list(redis_uve_list) # If there is no discovery service, use fixed alarmgen list self._libpart = self.start_libpart(self._conf.alarmgen_list()) PartitionOwnershipReq.handle_request = self.handle_PartitionOwnershipReq PartitionStatusReq.handle_request = self.handle_PartitionStatusReq UVETableAlarmReq.handle_request = self.handle_UVETableAlarmReq def libpart_cb(self, part_list): newset = set(part_list) oldset = self._partset self._partset = newset self._logger.info('Partition List : new %s old %s' % \ (str(newset),str(oldset))) for addpart in (newset-oldset): self._logger.info('Partition Add : %s' % addpart) self.partition_change(addpart, True) for delpart in (oldset-newset): self._logger.info('Partition Del : %s' % delpart) self.partition_change(delpart, True) def start_libpart(self, ag_list): if not self._conf.zk_list(): self._logger.error('Could not import libpartition: No zookeeper') return None if not ag_list: self._logger.error('Could not import libpartition: No alarmgen list') return None try: from libpartition.libpartition import PartitionClient self._logger.error('Starting PC') pc = PartitionClient("alarmgen", self._libpart_name, ag_list, self._conf.partitions(), self.libpart_cb, ','.join(self._conf.zk_list())) self._logger.error('Started PC') return pc except Exception as e: self._logger.error('Could not import libpartition: %s' % str(e)) return None def handle_uve_notif(self, uves): self._logger.debug("Changed UVEs : %s" % str(uves)) no_handlers = set() for uv in uves: tab = uv.split(':',1)[0] if not self.mgrs.has_key(tab): no_handlers.add(tab) continue itr = self._us.multi_uve_get(uv, True, None, None, None, None) uve_data = itr.next()['value'] if len(uve_data) == 0: self._logger.info("UVE %s deleted" % uv) if self.tab_alarms[tab].has_key(uv): del self.tab_alarms[tab][uv] ustruct = UVEAlarms(name = uv, deleted = True) alarm_msg = AlarmTrace(data=ustruct, table=tab) self._logger.info('send del alarm: %s' % (alarm_msg.log())) alarm_msg.send() continue results = self.mgrs[tab].map_method("__call__", uv, uve_data) new_uve_alarms = {} for res in results: nm, errs = res self._logger.info("Alarm[%s] %s: %s" % (tab, nm, str(errs))) elems = [] for ae in errs: rule, val = ae rv = AlarmElement(rule, val) elems.append(rv) if len(elems): new_uve_alarms[nm] = UVEAlarmInfo(type = nm, description = elems, ack = False) if (not self.tab_alarms[tab].has_key(uv)) or \ pprint.pformat(self.tab_alarms[tab][uv]) != \ pprint.pformat(new_uve_alarms): ustruct = UVEAlarms(name = uv, alarms = new_uve_alarms.values(), deleted = False) alarm_msg = AlarmTrace(data=ustruct, table=tab) self._logger.info('send alarm: %s' % (alarm_msg.log())) alarm_msg.send() self.tab_alarms[tab][uv] = new_uve_alarms if len(no_handlers): self._logger.info('No Alarm Handlers for %s' % str(no_handlers)) def handle_UVETableAlarmReq(self, req): status = False if req.table == "all": parts = self.tab_alarms.keys() else: parts = [req.table] self._logger.info("Got UVETableAlarmReq : %s" % str(parts)) np = 1 for pt in parts: resp = UVETableAlarmResp(table = pt) uves = [] for uk,uv in self.tab_alarms[pt].iteritems(): alms = [] for ak,av in uv.iteritems(): alms.append(av) uves.append(UVEAlarms(name = uk, alarms = alms)) resp.uves = uves if np == len(parts): mr = False else: mr = True resp.response(req.context(), mr) np = np + 1 def partition_change(self, partno, enl): """ Call this function when getting or giving up ownership of a partition Args: partno : Partition Number enl : True for acquiring, False for giving up Returns: status of operation (True for success) """ status = False if enl: if self._workers.has_key(partno): self._logger.info("Dup partition %d" % partno) else: uvedb = self._us.get_part(partno) ph = UveStreamProc(','.join(self._conf.kafka_broker_list()), partno, "uve-" + str(partno), self._logger, uvedb, self.handle_uve_notif) ph.start() self._workers[partno] = ph status = True else: if self._workers.has_key(partno): ph = self._workers[partno] gevent.kill(ph) res,db = ph.get() print "Returned " + str(res) print "State :" for k,v in db.iteritems(): print "%s -> %s" % (k,str(v)) del self._workers[partno] status = True else: self._logger.info("No partition %d" % partno) return status def handle_PartitionOwnershipReq(self, req): self._logger.info("Got PartitionOwnershipReq: %s" % str(req)) status = self.partition_change(req.partition, req.ownership) resp = PartitionOwnershipResp() resp.status = status resp.response(req.context()) def handle_PartitionStatusReq(self, req): if req.partition == -1: parts = self._workers.keys() else: parts = [req.partition] self._logger.info("Got PartitionStatusReq: %s" % str(parts)) np = 1 for pt in parts: resp = PartitionStatusResp() resp.partition = pt if self._workers.has_key(pt): resp.enabled = True resp.uves = [] for kcoll,coll in self._workers[pt].contents().iteritems(): uci = UVECollInfo() uci.collector = kcoll uci.uves = [] for kgen,gen in coll.iteritems(): ugi = UVEGenInfo() ugi.generator = kgen ugi.uves = [] for uk,uc in gen.iteritems(): ukc = UVEKeyInfo() ukc.key = uk ukc.count = uc ugi.uves.append(ukc) uci.uves.append(ugi) resp.uves.append(uci) else: resp.enabled = False if np == len(parts): mr = False else: mr = True resp.response(req.context(), mr) np = np + 1 def disc_cb_coll(self, clist): ''' Analytics node may be brought up/down any time. For UVE aggregation, alarmgen needs to know the list of all Analytics nodes (redis-uves). Periodically poll the Collector list [in lieu of redi-uve nodes] from the discovery. ''' newlist = [] for elem in clist: (ipaddr,port) = elem newlist.append((ipaddr, self._conf.redis_server_port())) self._us.update_redis_uve_list(newlist) def disc_cb_ag(self, alist): ''' Analytics node may be brought up/down any time. For partitioning, alarmgen needs to know the list of all Analytics nodes (alarmgens). Periodically poll the alarmgen list from the discovery service ''' newlist = [] for elem in alist: (ipaddr, inst) = elem newlist.append(ipaddr + ":" + inst) # We should always include ourselves in the list of memebers newset = set(newlist) newset.add(self._libpart_name) newlist = list(newset) if not self._libpart: self._libpart = self.start_libpart(newlist) else: self._libpart.update_cluster_list(newlist) def run(self): while True: gevent.sleep(60)
class Controller(object): @staticmethod def fail_cb(manager, entrypoint, exception): sandesh_global._logger.info("Load failed for %s with exception %s" % \ (str(entrypoint),str(exception))) def __init__(self, conf): self._conf = conf module = Module.ALARM_GENERATOR self._moduleid = ModuleNames[module] node_type = Module2NodeType[module] self._node_type_name = NodeTypeNames[node_type] self._hostname = socket.gethostname() self._instance_id = '0' sandesh_global.init_generator(self._moduleid, self._hostname, self._node_type_name, self._instance_id, self._conf.collectors(), self._node_type_name, self._conf.http_port(), ['opserver.sandesh', 'sandesh']) sandesh_global.set_logging_params( enable_local_log=self._conf.log_local(), category=self._conf.log_category(), level=self._conf.log_level(), file=self._conf.log_file(), enable_syslog=self._conf.use_syslog(), syslog_facility=self._conf.syslog_facility()) self._logger = sandesh_global._logger # Trace buffer list self.trace_buf = [{'name': 'DiscoveryMsg', 'size': 1000}] # Create trace buffers for buf in self.trace_buf: sandesh_global.trace_buffer_create(name=buf['name'], size=buf['size']) tables = [ "ObjectCollectorInfo", "ObjectDatabaseInfo", "ObjectVRouter", "ObjectBgpRouter", "ObjectConfigNode" ] self.mgrs = {} self.tab_alarms = {} for table in tables: self.mgrs[table] = hook.HookManager( namespace='contrail.analytics.alarms', name=table, invoke_on_load=True, invoke_args=(), on_load_failure_callback=Controller.fail_cb) for extn in self.mgrs[table][table]: self._logger.info('Loaded extensions for %s: %s,%s' % \ (table, extn.name, extn.entry_point_target)) self.tab_alarms[table] = {} ConnectionState.init( sandesh_global, self._hostname, self._moduleid, self._instance_id, staticmethod(ConnectionState.get_process_state_cb), NodeStatusUVE, NodeStatus) self._us = UVEServer(None, self._logger, self._conf.redis_password()) self.disc = None if self._conf.discovery()['server']: import discoveryclient.client as client data = {'ip-address': self._hostname, 'port': self._instance_id} self.disc = client.DiscoveryClient( self._conf.discovery()['server'], self._conf.discovery()['port'], ModuleNames[Module.ALARM_GENERATOR]) self._logger.info("Disc Publish to %s : %s" % (str(self._conf.discovery()), str(data))) self.disc.publish(ALARM_GENERATOR_SERVICE_NAME, data) else: redis_uve_list = [] try: for redis_uve in self._conf.redis_uve_list(): redis_ip_port = redis_uve.split(':') redis_ip_port = (redis_ip_port[0], int(redis_ip_port[1])) redis_uve_list.append(redis_ip_port) except Exception as e: self._logger.error('Failed to parse redis_uve_list: %s' % e) else: self._us.update_redis_uve_list(redis_uve_list) PartitionOwnershipReq.handle_request = self.handle_PartitionOwnershipReq PartitionStatusReq.handle_request = self.handle_PartitionStatusReq UVETableAlarmReq.handle_request = self.handle_UVETableAlarmReq self._workers = {} def handle_uve_notif(self, uves): self._logger.debug("Changed UVEs : %s" % str(uves)) no_handlers = set() for uv in uves: tab = uv.split(':', 1)[0] if not self.mgrs.has_key(tab): no_handlers.add(tab) continue itr = self._us.multi_uve_get(uv, True, None, None, None, None) uve_data = itr.next()['value'] if len(uve_data) == 0: del self.tab_alarms[tab][uv] self._logger.info("UVE %s deleted" % uv) continue results = self.mgrs[tab].map_method("__call__", uv, uve_data) new_uve_alarms = {} for res in results: nm, errs = res self._logger.info("Alarm[%s] %s: %s" % (tab, nm, str(errs))) elems = [] for ae in errs: rule, val = ae rv = AlarmElement(rule, val) elems.append(rv) if len(elems): new_uve_alarms[nm] = UVEAlarmInfo(type=nm, description=elems, ack=False) self.tab_alarms[tab][uv] = new_uve_alarms if len(no_handlers): self._logger.info('No Alarm Handlers for %s' % str(no_handlers)) def handle_UVETableAlarmReq(self, req): status = False if req.table == "all": parts = self.tab_alarms.keys() else: parts = [req.table] self._logger.info("Got UVETableAlarmReq : %s" % str(parts)) np = 1 for pt in parts: resp = UVETableAlarmResp(table=pt) uves = [] for uk, uv in self.tab_alarms[pt].iteritems(): alms = [] for ak, av in uv.iteritems(): alms.append(av) uves.append(UVEAlarms(name=uk, alarms=alms)) resp.uves = uves if np == len(parts): mr = False else: mr = True resp.response(req.context(), mr) np = np + 1 def handle_PartitionOwnershipReq(self, req): self._logger.info("Got PartitionOwnershipReq: %s" % str(req)) status = False if req.ownership: if self._workers.has_key(req.partition): self._logger.info("Dup partition %d" % req.partition) else: uvedb = self._us.get_part(req.partition) ph = UveStreamProc(','.join(self._conf.kafka_broker_list()), req.partition, "uve-" + str(req.partition), self._logger, uvedb, self.handle_uve_notif) ph.start() self._workers[req.partition] = ph status = True else: #import pdb; pdb.set_trace() if self._workers.has_key(req.partition): ph = self._workers[req.partition] gevent.kill(ph) res, db = ph.get() print "Returned " + str(res) print "State :" for k, v in db.iteritems(): print "%s -> %s" % (k, str(v)) del self._workers[req.partition] status = True else: self._logger.info("No partition %d" % req.partition) resp = PartitionOwnershipResp() resp.status = status resp.response(req.context()) def handle_PartitionStatusReq(self, req): if req.partition == -1: parts = self._workers.keys() else: parts = [req.partition] self._logger.info("Got PartitionStatusReq: %s" % str(parts)) np = 1 for pt in parts: resp = PartitionStatusResp() resp.partition = pt if self._workers.has_key(pt): resp.enabled = True resp.uves = [] for kcoll, coll in self._workers[pt].contents().iteritems(): uci = UVECollInfo() uci.collector = kcoll uci.uves = [] for kgen, gen in coll.iteritems(): ugi = UVEGenInfo() ugi.generator = kgen ugi.uves = list(gen) uci.uves.append(ugi) resp.uves.append(uci) else: resp.enabled = False if np == len(parts): mr = False else: mr = True resp.response(req.context(), mr) np = np + 1 def disc_cb_coll(self, clist): ''' Analytics node may be brought up/down any time. For UVE aggregation, alarmgen needs to know the list of all Analytics nodes (redis-uves). Periodically poll the Collector list [in lieu of redi-uve nodes] from the discovery. ''' newlist = [] for elem in clist: (ipaddr, port) = elem newlist.append((ipaddr, self._conf.redis_server_port())) self._us.update_redis_uve_list(newlist) def disc_cb_ag(self, alist): ''' Analytics node may be brought up/down any time. For partitioning, alarmgen needs to know the list of all Analytics nodes (alarmgens). Periodically poll the alarmgen list from the discovery service ''' # TODO : Hookup with partitioning library pass def run(self): while True: gevent.sleep(60)
class Controller(object): @staticmethod def fail_cb(manager, entrypoint, exception): sandesh_global._logger.info("Load failed for %s with exception %s" % \ (str(entrypoint),str(exception))) def __init__(self, conf): self._conf = conf module = Module.ALARM_GENERATOR self._moduleid = ModuleNames[module] node_type = Module2NodeType[module] self._node_type_name = NodeTypeNames[node_type] self._hostname = socket.gethostname() self._instance_id = '0' sandesh_global.init_generator(self._moduleid, self._hostname, self._node_type_name, self._instance_id, self._conf.collectors(), self._node_type_name, self._conf.http_port(), ['opserver.sandesh', 'sandesh']) sandesh_global.set_logging_params( enable_local_log=self._conf.log_local(), category=self._conf.log_category(), level=self._conf.log_level(), file=self._conf.log_file(), enable_syslog=self._conf.use_syslog(), syslog_facility=self._conf.syslog_facility()) self._logger = sandesh_global._logger # Trace buffer list self.trace_buf = [ {'name':'DiscoveryMsg', 'size':1000} ] # Create trace buffers for buf in self.trace_buf: sandesh_global.trace_buffer_create(name=buf['name'], size=buf['size']) tables = [ "ObjectCollectorInfo", "ObjectDatabaseInfo", "ObjectVRouter", "ObjectBgpRouter", "ObjectConfigNode" ] self.mgrs = {} self.tab_alarms = {} for table in tables: self.mgrs[table] = hook.HookManager( namespace='contrail.analytics.alarms', name=table, invoke_on_load=True, invoke_args=(), on_load_failure_callback=Controller.fail_cb ) for extn in self.mgrs[table][table]: self._logger.info('Loaded extensions for %s: %s,%s' % \ (table, extn.name, extn.entry_point_target)) self.tab_alarms[table] = {} ConnectionState.init(sandesh_global, self._hostname, self._moduleid, self._instance_id, staticmethod(ConnectionState.get_process_state_cb), NodeStatusUVE, NodeStatus) self._us = UVEServer(None, self._logger, self._conf.redis_password()) self.disc = None if self._conf.discovery()['server']: import discoveryclient.client as client data = { 'ip-address': self._hostname , 'port': self._instance_id } self.disc = client.DiscoveryClient( self._conf.discovery()['server'], self._conf.discovery()['port'], ModuleNames[Module.ALARM_GENERATOR]) self._logger.info("Disc Publish to %s : %s" % (str(self._conf.discovery()), str(data))) self.disc.publish(ALARM_GENERATOR_SERVICE_NAME, data) else: redis_uve_list = [] try: for redis_uve in self._conf.redis_uve_list(): redis_ip_port = redis_uve.split(':') redis_ip_port = (redis_ip_port[0], int(redis_ip_port[1])) redis_uve_list.append(redis_ip_port) except Exception as e: self._logger.error('Failed to parse redis_uve_list: %s' % e) else: self._us.update_redis_uve_list(redis_uve_list) PartitionOwnershipReq.handle_request = self.handle_PartitionOwnershipReq PartitionStatusReq.handle_request = self.handle_PartitionStatusReq UVETableAlarmReq.handle_request = self.handle_UVETableAlarmReq self._workers = {} def handle_uve_notif(self, uves): self._logger.debug("Changed UVEs : %s" % str(uves)) no_handlers = set() for uv in uves: tab = uv.split(':',1)[0] if not self.mgrs.has_key(tab): no_handlers.add(tab) continue itr = self._us.multi_uve_get(uv, True, None, None, None, None) uve_data = itr.next()['value'] if len(uve_data) == 0: del self.tab_alarms[tab][uv] self._logger.info("UVE %s deleted" % uv) continue results = self.mgrs[tab].map_method("__call__", uv, uve_data) new_uve_alarms = {} for res in results: nm, errs = res self._logger.info("Alarm[%s] %s: %s" % (tab, nm, str(errs))) elems = [] for ae in errs: rule, val = ae rv = AlarmElement(rule, val) elems.append(rv) if len(elems): new_uve_alarms[nm] = UVEAlarmInfo(type = nm, description = elems, ack = False) self.tab_alarms[tab][uv] = new_uve_alarms if len(no_handlers): self._logger.info('No Alarm Handlers for %s' % str(no_handlers)) def handle_UVETableAlarmReq(self, req): status = False if req.table == "all": parts = self.tab_alarms.keys() else: parts = [req.table] self._logger.info("Got UVETableAlarmReq : %s" % str(parts)) np = 1 for pt in parts: resp = UVETableAlarmResp(table = pt) uves = [] for uk,uv in self.tab_alarms[pt].iteritems(): alms = [] for ak,av in uv.iteritems(): alms.append(av) uves.append(UVEAlarms(name = uk, alarms = alms)) resp.uves = uves if np == len(parts): mr = False else: mr = True resp.response(req.context(), mr) np = np + 1 def handle_PartitionOwnershipReq(self, req): self._logger.info("Got PartitionOwnershipReq: %s" % str(req)) status = False if req.ownership: if self._workers.has_key(req.partition): self._logger.info("Dup partition %d" % req.partition) else: uvedb = self._us.get_part(req.partition) ph = UveStreamProc(','.join(self._conf.kafka_broker_list()), req.partition, "uve-" + str(req.partition), self._logger, uvedb, self.handle_uve_notif) ph.start() self._workers[req.partition] = ph status = True else: #import pdb; pdb.set_trace() if self._workers.has_key(req.partition): ph = self._workers[req.partition] gevent.kill(ph) res,db = ph.get() print "Returned " + str(res) print "State :" for k,v in db.iteritems(): print "%s -> %s" % (k,str(v)) del self._workers[req.partition] status = True else: self._logger.info("No partition %d" % req.partition) resp = PartitionOwnershipResp() resp.status = status resp.response(req.context()) def handle_PartitionStatusReq(self, req): if req.partition == -1: parts = self._workers.keys() else: parts = [req.partition] self._logger.info("Got PartitionStatusReq: %s" % str(parts)) np = 1 for pt in parts: resp = PartitionStatusResp() resp.partition = pt if self._workers.has_key(pt): resp.enabled = True resp.uves = [] for kcoll,coll in self._workers[pt].contents().iteritems(): uci = UVECollInfo() uci.collector = kcoll uci.uves = [] for kgen,gen in coll.iteritems(): ugi = UVEGenInfo() ugi.generator = kgen ugi.uves = list(gen) uci.uves.append(ugi) resp.uves.append(uci) else: resp.enabled = False if np == len(parts): mr = False else: mr = True resp.response(req.context(), mr) np = np + 1 def disc_cb_coll(self, clist): ''' Analytics node may be brought up/down any time. For UVE aggregation, alarmgen needs to know the list of all Analytics nodes (redis-uves). Periodically poll the Collector list [in lieu of redi-uve nodes] from the discovery. ''' newlist = [] for elem in clist: (ipaddr,port) = elem newlist.append((ipaddr, self._conf.redis_server_port())) self._us.update_redis_uve_list(newlist) def disc_cb_ag(self, alist): ''' Analytics node may be brought up/down any time. For partitioning, alarmgen needs to know the list of all Analytics nodes (alarmgens). Periodically poll the alarmgen list from the discovery service ''' # TODO : Hookup with partitioning library pass def run(self): while True: gevent.sleep(60)