def main(self): self.set_proctitle(self.name) self.set_exit_handler() try: cs = CollectdServer(self.host, self.port, self.multicast) while True: # Each second we are looking at sending old elements for e in elements.values(): c = e.get_command() if c is not None: print 'Got ', c self.from_q.put(ExternalCommand(c)) for item in cs.read(): print item, item.__dict__ n = item.get_name() if n and n not in elements: e = Element(item.host, item.get_srv_desc(), item.interval) elements[n] = e e = elements[n] if item.get_kind() == TYPE_VALUES: e.add_perf_data(item.get_metric_name(), item.get_metric_values(), item.get_time()) elif item.get_kind() == TYPE_MESSAGE: c = item.get_message_command() if c is not None: self.from_q.put(ExternalCommand(c)) except Exception, e: logger.error("[Collectd] exception: %s" % str(e))
def hook_tick(self, arb): now = int(time.time()) self.tick_external_command() print "*" * 10, "Tick tick for hot dependency" # If the mapping file changed, we reload it and update our links # if we need it if self._is_mapping_file_changed(): print "The mapping file changed, I update it" self._update_mapping() additions, removed = self._got_mapping_changes() print "Additions : ", additions print "Remove : ", removed for father_k, son_k in additions: son_type, son_name = son_k father_type, father_name = father_k print "Got new add", son_type, son_name, father_type, father_name son = arb.conf.hosts.find_by_name(son_name.strip()) father = arb.conf.hosts.find_by_name(father_name.strip()) # if we cannot find them in the conf, bypass them if son is None or father is None: print "not find dumbass!" continue print son_name, father_name if son_type == 'host' and father_type == 'host': # We just raise the external command, arbiter will do the job # to dispatch them extcmd = "[%lu] ADD_SIMPLE_HOST_DEPENDENCY;%s;%s\n" % ( now, son_name, father_name) e = ExternalCommand(extcmd) print 'Raising external command', extcmd arb.add(e) # And now the deletion part for father_k, son_k in removed: son_type, son_name = son_k father_type, father_name = father_k print "Got new del", son_type, son_name, father_type, father_name son = arb.conf.hosts.find_by_name(son_name.strip()) father = arb.conf.hosts.find_by_name(father_name.strip()) # if we cannot find them in the conf, bypass them if son is None or father is None: print "not find dumbass!" continue print son_name, father_name if son_type == 'host' and father_type == 'host': # We just raise the external command, arbiter will do the job # to dispatch them extcmd = "[%lu] DEL_HOST_DEPENDENCY;%s;%s\n" % ( now, son_name, father_name) e = ExternalCommand(extcmd) print 'Raising external command', extcmd arb.add(e) print '\n' * 10
def hook_tick(self, arb): now = int(time.time()) self.tick_external_command() logger.debug("[Hot dependencies] Tick tick for hot dependency") # If the mapping file changed, we reload it and update our links # if we need it if self._is_mapping_file_changed(): logger.debug("[Hot dependencies] The mapping file changed, I update it") self._update_mapping() additions, removed = self._got_mapping_changes() logger.debug("[Hot dependencies] Additions: %s" % additions) logger.debug("[Hot dependencies] Remove: %s " % removed) for father_k, son_k in additions: son_type, son_name = son_k father_type, father_name = father_k logger.debug("[Hot dependencies] Got new add %s %s %s %s" % (son_type, son_name, father_type, father_name)) son = arb.conf.hosts.find_by_name(son_name.strip()) father = arb.conf.hosts.find_by_name(father_name.strip()) # if we cannot find them in the conf, bypass them if son is None or father is None: logger.debug("[Hot dependencies] not find dumbass!") continue logger.info("[Hot dependencies] Linked son : %s and its father: %s" % (son_name, father_name)) if son_type == 'host' and father_type == 'host': # We just raise the external command, arbiter will do the job # to dispatch them extcmd = "[%lu] ADD_SIMPLE_HOST_DEPENDENCY;%s;%s\n" % (now, son_name, father_name) e = ExternalCommand(extcmd) logger.debug('[Hot dependencies] Raising external command: %s' % extcmd) arb.add(e) # And now the deletion part for father_k, son_k in removed: son_type, son_name = son_k father_type, father_name = father_k logger.debug("[Hot dependencies] Got new del %s %s %s %s" % (son_type, son_name, father_type, father_name)) son = arb.conf.hosts.find_by_name(son_name.strip()) father = arb.conf.hosts.find_by_name(father_name.strip()) # if we cannot find them in the conf, bypass them if son is None or father is None: logger.debug("[Hot dependencies] not find dumbass!") continue logger.info("[Hot dependencies] Linked son : %s and its father: %s" % (son_name, father_name)) if son_type == 'host' and father_type == 'host': # We just raise the external command, arbiter will do the job # to dispatch them extcmd = "[%lu] DEL_HOST_DEPENDENCY;%s;%s\n" % (now, son_name, father_name) e = ExternalCommand(extcmd) logger.debug('[Hot dependencies] Raising external command %s' % extcmd) arb.add(e)
def read_and_parse_sms(self): # Get only unread SMS of the inbox SMSmsgs = self.android.smsGetMessages(True, 'inbox').result to_mark = [] cmds = [] for message in SMSmsgs: # Read the message body = message['body'].encode('utf8', 'ignore') to_mark.append(message['_id']) print 'Addr', type(message['address']) print 'Message', type(body) print message if body.startswith(('ack', 'Ack', 'ACK')): elts = body.split(' ') if len(elts) <= 1: print "Bad message length" continue # Ok, look for host or host/service raw = elts[1] if '/' in raw: elts = raw.split('/') # If not service desc, bail out if len(elts) == 1: continue hname = elts[0] sdesc = ' '.join(elts[1:]) extcmd = 'ACKNOWLEDGE_SVC_PROBLEM;%s;%s;1;1;1;SMSPhoneAck;None\n' % ( hname, sdesc) e = ExternalCommand(extcmd) cmds.append(e) else: hname = raw extcmd = 'ACKNOWLEDGE_HOST_PROBLEM;%s;1;1;1;SMSPhoneAck;None\n' % hname e = ExternalCommand(extcmd) cmds.append(e) # Mark all read messages as read r = self.android.smsMarkMessageRead(to_mark, True) print "Raise messages: " print cmds for cmd in cmds: try: # Under android we got a queue here self.returns_queue.put(cmd) except IOError, exp: print "[%d]Exiting: %s" % (self.id, exp) sys.exit(2)
def get_page(cmd=None): app.response.content_type = 'application/json' callback = app.request.query.get('callback', None) response_text = app.request.query.get('response_text', 'Command launched') # Or he is not allowed to launch commands? if not app.can_action(): return forge_response(callback, 403, 'You are not authorized to launch commands') now = subsNOW() elts = cmd.split('/') cmd_name = elts[0] cmd_args = elts[1:] logger.info("[WebUI-actions] got command: %s with args: %s.", cmd_name, cmd_args) # Check if the command exist in the Shinken external command list if cmd_name not in ExternalCommandManager.commands: logger.error("[WebUI-actions] unknown command: %s", cmd_name) return forge_response(callback, 404, 'Unknown command %s' % cmd_name) try: extcmd = u"[%s] %s" % (now, ';'.join(elts)) except UnicodeDecodeError as e: extcmd = "[%s] %s" % (now, ';'.join(elts)) # Expand macros extcmd = expand_macros(extcmd) logger.info("[WebUI-actions] external command: %s.", extcmd) e = ExternalCommand(extcmd) app.push_external_command(e) return forge_response(callback, 200, response_text)
def get_page(cmd=None): # First we look for the user sid # so we bail out if it's a false one user = app.get_user_auth() if not user: return {'status': 401, 'text': 'Invalid session'} now = int(time.time()) print "Ask us an /action page", cmd elts = cmd.split('/') cmd_name = elts[0] cmd_args = elts[1:] print "Got command", cmd_name print "And args", cmd_args # Check if the command exist in the external command list if cmd_name not in ExternalCommandManager.commands: return {'status': 404, 'text': 'Unknown command %s' % cmd_name} extcmd = '[%d] %s' % (now, ';'.join(elts)) print "Got the; form", extcmd # Ok, if good, we can launch the command extcmd = extcmd.decode('utf8', 'replace') e = ExternalCommand(extcmd) print "Creating the command", e.__dict__ app.push_external_command(e) return {'status': 200, 'text': 'Command launched'}
def do_recheck(): # Getting lists of informations for the commands time_stamp = request.forms.get('time_stamp', int(time.time())) host_name = request.forms.get('host_name', '') service_description = request.forms.get('service_description', '') logger.debug("[WS_Arbiter] Timestamp '%s' - host: '%s', service: '%s'" % (time_stamp, host_name, service_description)) if not host_name: abort(400, 'Missing parameter host_name') if service_description: # SCHEDULE_FORCED_SVC_CHECK;<host_name>;<service_description>;<check_time> command = '[%s] SCHEDULE_FORCED_SVC_CHECK;%s;%s;%s\n' % ( time_stamp, host_name, service_description, time_stamp) else: # SCHEDULE_FORCED_HOST_CHECK;<host_name>;<check_time> command = '[%s] SCHEDULE_FORCED_HOST_CHECK;%s;%s\n' % ( time_stamp, host_name, time_stamp) # We check for auth if it's not anonymously allowed check_auth() # Adding commands to the main queue() logger.debug("[WS_Arbiter] command = %s" % command) ext = ExternalCommand(command) app.from_q.put(ext)
def push_extcmd(self, extcmd): e = ExternalCommand(extcmd) if self.from_q: # logger.info("[EC] push_extcmd!!") self.from_q.put(e) else: logger.info("[EC] push_extcmd no from_q! e=%s" % extcmd)
def do_change_host_var(): # Getting lists of informations for the commands time_stamp = request.forms.get('time_stamp', int(time.time())) host_name = request.forms.get('host_name', '') var_name = request.forms.get('var_name', '') value = request.forms.get('value', '') logger.debug( "[WS_Arbiter] Timestamp '%s' - host: '%s', var_name: '%s', value: '%s'" % (time_stamp, host_name, var_name, value)) if not host_name: abort(400, 'Missing parameter host_name') if not var_name: abort(400, 'Missing parameter var_name') # We check for auth if it's not anonymously allowed check_auth() command = '[%s] CHANGE_HOST_VAR;%s;%s;%s\n' % (time_stamp, host_name, var_name, value) # Adding commands to the main queue() logger.debug("[WS_Arbiter] command = %s" % command) ext = ExternalCommand(command) app.from_q.put(ext)
def main(self): self.set_proctitle(self.name) self.set_exit_handler() last_check = 0.0 cs = CollectdServer() while True: print "-" * 80 # Each second we are looking at sending old elements if time.time() > last_check + 1: for e in elements.values(): c = e.get_command() if c is not None: print 'Got ', c ext_cmd = ExternalCommand(c) self.from_q.put(ext_cmd) try: for item in cs.read(): print item, item.__dict__ n = item.get_name() if n and n not in elements: e = Element(item.host, item.get_srv_desc(), item.interval) elements[n] = e e = elements[n] e.add_perf_data(item.get_metric_name(), item.get_metric_value()) except ValueError, exp: print "Collectd read error: ", exp
def get_page(): # We get all value we want time_stamp = request.forms.get('time_stamp', int(time.time())) host_name = request.forms.get('host_name', None) service_description = request.forms.get('service_description', None) return_code = request.forms.get('return_code', -1) output = request.forms.get('output', None) # We check for auth if it's not anonymously allowed if app.username != 'anonymous': basic = parse_auth(request.environ.get('HTTP_AUTHORIZATION', '')) # Maybe the user not even ask for user/pass. If so, bail out if not basic: abort(401, 'Authentication required') # Maybe he do not give the good credential? if basic[0] != app.username or basic[1] != app.password: abort(403, 'Authentication denied') # Ok, here it's an anonymouscall, or a registred one, but mayeb teh query is false if time_stamp == 0 or not host_name or not output or return_code == -1: abort(400, "Incorrect syntax") # Maybe we got an host, maybe a service :) if not service_description: cmd = '[%s] PROCESS_HOST_CHECK_RESULT;%s;%s;%s' % (time_stamp, host_name, return_code, output) else: cmd = '[%s] PROCESS_SERVICE_CHECK_RESULT;%s;%s;%s;%s' % (time_stamp, host_name, service_description, return_code, output) # Now create the external command and put it in our main queue() # so the arbiter will read it :) ext = ExternalCommand(cmd) app.from_q.put(ext)
def do_downtime(): # Getting lists of informations for the commands action = request.forms.get('action', 'add') time_stamp = request.forms.get('time_stamp', int(time.time())) host_name = request.forms.get('host_name', '') service_description = request.forms.get('service_description', '') start_time = request.forms.get('start_time', int(time.time())) end_time = request.forms.get('end_time', int(time.time())) # Fixed is 1 for a period between start and end time fixed = request.forms.get('fixed', '1') # Fixed is 0 (flexible) for a period of duration seconds from start time duration = request.forms.get('duration', int('86400')) trigger_id = request.forms.get('trigger_id', '0') author = request.forms.get('author', 'anonymous') comment = request.forms.get('comment', 'No comment') logger.debug( "[WS_Arbiter] Downtime %s - host: '%s', service: '%s', comment: '%s'" % (action, host_name, service_description, comment)) if not host_name: abort(400, 'Missing parameter host_name') if action == 'add': if service_description: # SCHEDULE_SVC_DOWNTIME;<host_name>;<service_description>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> command = '[%s] SCHEDULE_SVC_DOWNTIME;%s;%s;%s;%s;%s;%s;%s;%s;%s\n' % ( time_stamp, host_name, service_description, start_time, end_time, fixed, trigger_id, duration, author, comment) else: # SCHEDULE_HOST_DOWNTIME;<host_name>;<start_time>;<end_time>;<fixed>;<trigger_id>;<duration>;<author>;<comment> command = '[%s] SCHEDULE_HOST_DOWNTIME;%s;%s;%s;%s;%s;%s;%s;%s\n' % ( time_stamp, host_name, start_time, end_time, fixed, trigger_id, duration, author, comment) if action == 'delete': if service_description: # DEL_ALL_SVC_DOWNTIMES;<host_name>;<service_description> command = '[%s] DEL_ALL_SVC_DOWNTIMES;%s;%s\n' % ( time_stamp, host_name, service_description) else: # DEL_ALL_SVC_DOWNTIMES;<host_name> command = '[%s] DEL_ALL_HOST_DOWNTIMES;%s\n' % (time_stamp, host_name) # We check for auth if it's not anonymously allowed if app.username != 'anonymous': basic = parse_auth(request.environ.get('HTTP_AUTHORIZATION', '')) # Maybe the user not even ask for user/pass. If so, bail out if not basic: abort(401, 'Authentication required') # Maybe he do not give the good credential? if basic[0] != app.username or basic[1] != app.password: abort(403, 'Authentication denied') # Adding commands to the main queue() logger.debug("[WS_Arbiter] command = %s" % command) ext = ExternalCommand(command) app.from_q.put(ext)
def do_acknowledge(): # Getting lists of informations for the commands action = request.forms.get('action', 'add') time_stamp = request.forms.get('time_stamp', int(time.time())) host_name = request.forms.get('host_name', '') service_description = request.forms.get('service_description', '') sticky = request.forms.get('sticky', '1') notify = request.forms.get('notify', '0') persistent = request.forms.get('persistent', '1') author = request.forms.get('author', 'anonymous') comment = request.forms.get('comment', 'No comment') logger.debug("[WS_Arbiter] Acknowledge %s - host: '%s', service: '%s', comment: '%s'" % (action, host_name, service_description, comment)) if not host_name: abort(400, 'Missing parameter host_name') if action == 'add': if service_description: command = '[%s] ACKNOWLEDGE_SVC_PROBLEM;%s;%s;%s;%s;%s;%s;%s\n' % ( time_stamp, host_name, service_description, sticky, notify, persistent, author, comment ) else: command = '[%s] ACKNOWLEDGE_HOST_PROBLEM;%s;%s;%s;%s;%s;%s\n' % ( time_stamp, host_name, sticky, notify, persistent, author, comment ) if action == 'delete': if service_description: # REMOVE_SVC_ACKNOWLEDGEMENT;<host_name>;<service_description> command = '[%s] REMOVE_SVC_ACKNOWLEDGEMENT;%s;%s\n' % ( time_stamp, host_name, service_description) else: # REMOVE_HOST_ACKNOWLEDGEMENT;<host_name> command = '[%s] REMOVE_HOST_ACKNOWLEDGEMENT;%s\n' % ( time_stamp, host_name) # logger.warning("[WS_Arbiter] command: %s" % (command)) check_auth() # Adding commands to the main queue() logger.debug("[WS_Arbiter] command: %s" % str(command)) ext = ExternalCommand(command) app.from_q.put(ext)
def get(self): buf = os.read(self.fifo, 8096) r = [] fullbuf = len(buf) == 8096 and True or False # If the buffer ended with a fragment last time, prepend it here buf = self.cmd_fragments + buf buflen = len(buf) self.cmd_fragments = '' if fullbuf and buf[-1] != '\n': # The buffer was full but ends with a command fragment r.extend([ExternalCommand(s) for s in (buf.split('\n'))[:-1] if s]) self.cmd_fragments = (buf.split('\n'))[-1] elif buflen: # The buffer is either half-filled or full with a '\n' at the end. r.extend([ExternalCommand(s) for s in buf.split('\n') if s]) else: # The buffer is empty. We "reset" the fifo here. It will be # re-opened in the main loop. os.close(self.fifo) return r
def do_reload(): # Getting lists of informations for the commands time_stamp = request.forms.get('time_stamp', int(time.time())) command = '[%s] RELOAD_CONFIG\n' % time_stamp check_auth() # Adding commands to the main queue() logger.warning("[WS_Arbiter] command: %s" % str(command)) ext = ExternalCommand(command) app.from_q.put(ext)
def post_command(self, timestamp, rc, hostname, service, output): ''' Send a check result command to the arbiter ''' if len(service) == 0: extcmd = "[%lu] PROCESS_HOST_CHECK_RESULT;%s;%d;%s\n" % (timestamp, hostname, rc, output) else: extcmd = "[%lu] PROCESS_SERVICE_CHECK_RESULT;%s;%s;%d;%s\n" % (timestamp, hostname, service, rc, output) e = ExternalCommand(extcmd) self.from_q.put(e)
def launch_query(self): """ Prepare the request object's filter stacks """ # The Response object needs to access the Query self.response.load(self) if self.extcmd: # External command are send back to broker self.extcmd = self.extcmd.decode('utf8', 'replace') e = ExternalCommand(self.extcmd) self.return_queue.put(e) return []
def do_acknowledge(): # Getting lists of informations for the commands action = request.forms.get('action', 'add') time_stamp = request.forms.get('time_stamp', int(time.time())) host_name = request.forms.get('host_name', '') service_description = request.forms.get('service_description', '') sticky = request.forms.get('sticky', '1') notify = request.forms.get('notify', '0') persistent = request.forms.get('persistent', '1') author = request.forms.get('author', 'anonymous') comment = request.forms.get('comment', 'No comment') logger.debug( "[WS_Arbiter] Acknowledge %s - host: '%s', service: '%s', comment: '%s'" % (action, host_name, service_description, comment)) if not host_name: abort(400, 'Missing parameter host_name') if action == 'add': if service_description: command = '[%s] ACKNOWLEDGE_SVC_PROBLEM;%s;%s;%s;%s;%s;%s;%s\n' % ( time_stamp, host_name, service_description, sticky, notify, persistent, author, comment) else: command = '[%s] ACKNOWLEDGE_HOST_PROBLEM;%s;%s;%s;%s;%s;%s\n' % ( time_stamp, host_name, sticky, notify, persistent, author, comment) if action == 'delete': if service_description: # REMOVE_SVC_ACKNOWLEDGEMENT;<host_name>;<service_description> command = '[%s] REMOVE_SVC_ACKNOWLEDGEMENT;%s;%s\n' % ( time_stamp, host_name, service_description) else: # REMOVE_HOST_ACKNOWLEDGEMENT;<host_name> command = '[%s] REMOVE_HOST_ACKNOWLEDGEMENT;%s\n' % (time_stamp, host_name) # logger.warning("[WS_Arbiter] command: %s" % (command)) # We check for auth if it's not anonymously allowed if app.username != 'anonymous': basic = parse_auth(request.environ.get('HTTP_AUTHORIZATION', '')) # Maybe the user not even ask for user/pass. If so, bail out if not basic: abort(401, 'Authentication required') # Maybe he do not give the good credential? if basic[0] != app.username or basic[1] != app.password: abort(403, 'Authentication denied') # Adding commands to the main queue() logger.debug("[WS_Arbiter] command: %s" % str(command)) ext = ExternalCommand(command) app.from_q.put(ext)
def _read_collectd_packet(self, reader): ''' Read and interpret a packet from collectd. :param reader: A collectd Reader instance. ''' elements = self.elements lock = self.lock item_iterator = reader.interpret() while True: try: item = next(item_iterator) except StopIteration: break except CollectdException as err: logger.error('CollectdException: %s' % err) continue assert isinstance(item, Data) if isinstance(item, Notification): cmd = item.get_message_command() if cmd is not None: #logger.info('-> %s', cmd) self.from_q.put(ExternalCommand(cmd)) elif isinstance(item, Values): name = item.get_name() elem = elements.get(name, None) if elem is None: elem = Element(item.host, item.get_srv_desc(), item.interval) logger.info('Created %s ; interval=%s' % (elem, elem.interval)) else: # sanity check: # make sure element interval is updated when it's changed on collectd client: if elem.interval != item.interval: logger.info( '%s : interval changed from %s to %s ; adapting..' % (name, elem.interval, item.interval)) with lock: elem.interval = item.interval # also reset last_update time so that we'll wait that before resending its data: elem.last_sent = time.time() + item.interval elem.perf_datas.clear() # should we or not ? # now we can add this perf data: with lock: elem.add_perf_data(item.get_metric_name(), item, item.time) if name not in elements: elements[name] = elem
def post_command(self, timestamp, rc, hostname, service, output): ''' Send a check result command to the arbiter ''' if not service: extcmd = "[%lu] PROCESS_HOST_CHECK_RESULT;%s;%d;%s\n" % \ (timestamp, hostname, rc, output) else: extcmd = "[%lu] PROCESS_SERVICE_CHECK_RESULT;%s;%s;%d;%s\n" % \ (timestamp, hostname, service, rc, output) logger.debug("[NSCA] external command sent: %s" % (extcmd)) e = ExternalCommand(extcmd) self.from_q.put(e)
def launch_query(self): """ Prepare the request object's filter stacks """ # The Response object needs to access the Query self.response.load(self) if self.extcmd: # External command are send back to broker # TODO: check and clean this: # Somehow this line seems to prevent us from sending external # commands from THruk with accents: # self.extcmd = self.extcmd.decode('utf8', 'replace') e = ExternalCommand(self.extcmd) self.return_queue.put(e) return []
def get_page(cmd=None): app.response.content_type = 'application/json' print app.request.query.__dict__ callback = app.request.query.get('callback', None) # First we look for the user sid # so we bail out if it's a false one user = app.get_user_auth() # Maybe the user is not known at all if not user: return forge_response(callback, 401, 'Invalid session') # Or he is not allowed to launch commands? if app.manage_acl and not user.can_submit_commands: return forge_response(callback, 403, 'You are not authorized to launch commands') now = int(time.time()) print "Ask us an /action page", cmd elts = cmd.split('/') cmd_name = elts[0] cmd_args = elts[1:] print "Got command", cmd_name print "And args", cmd_args # Check if the command exist in the external command list if cmd_name not in ExternalCommandManager.commands: return forge_response(callback, 404, 'Unknown command %s' % cmd_name) extcmd = '[%d] %s' % (now, ';'.join(elts)) print "Got the; form", extcmd # Expand macros extcmd = expand_macros(extcmd) print "Got after macro expansion", extcmd # Ok, if good, we can launch the command extcmd = extcmd.decode('utf8', 'replace') e = ExternalCommand(extcmd) print "Creating the command", e.__dict__ app.push_external_command(e) return forge_response(callback, 200, 'Command launched')
def do_reload(): # Getting lists of informations for the commands time_stamp = request.forms.get('time_stamp', int(time.time())) command = '[%s] RELOAD_CONFIG\n' % time_stamp # We check for auth if it's not anonymously allowed if app.username != 'anonymous': basic = parse_auth(request.environ.get('HTTP_AUTHORIZATION', '')) # Maybe the user not even ask for user/pass. If so, bail out if not basic: abort(401, 'Authentication required') # Maybe he do not give the good credential? if basic[0] != app.username or basic[1] != app.password: abort(403, 'Authentication denied') # Adding commands to the main queue() logger.warning("[WS_Arbiter] command: %s" % str(command)) ext = ExternalCommand(command) app.from_q.put(ext)
def do_add_simple_host_dependency(): # Getting lists of informations for the commands time_stamp = request.forms.get('time_stamp', int(time.time())) son = request.forms.get('son', '') father = request.forms.get('father', '') logger.debug("[WS_Arbiter] Timestamp '%s' - son: '%s', father: '%s'" % (time_stamp, son, father)) if not son: abort(400, 'Missing parameter son') if not father: abort(400, 'Missing parameter father') # We check for auth if it's not anonymously allowed check_auth() command = '[%s] ADD_SIMPLE_HOST_DEPENDENCY;%s;%s\n' % (time_stamp, son, father) # Adding commands to the main queue() logger.debug("[WS_Arbiter] command = %s" % command) ext = ExternalCommand(command) app.from_q.put(ext)
def do_loop_turn(self): logger.info("[Dummy Arbiter] Raise a external command as example") e = ExternalCommand('Viva la revolution') self.from_q.put(e) time.sleep(1)
def do_loop_turn(self): print "Raise a external command as example" e = ExternalCommand('Viva la revolution') self.from_q.put(e) time.sleep(1)
def main(self): use_dedicated_thread = self.use_dedicated_thread elements = self.elements lock = self.lock now = time.time() clean_every = 15 report_every = 60 next_clean = now + clean_every next_report = now + report_every n_cmd_sent = 0 if not (self.udp or self.tcp): raise Exception('You must define a TCP or a UDP connection') reader = ShinkenCarbonReader( self.udp, self.tcp, interval=self.interval, grouped_collectd_plugins=self.grouped_collectd_plugins) try: if use_dedicated_thread: carbon_reader_thread = threading.Thread( target=self._read_carbon, args=(reader, )) carbon_reader_thread.start() while not self.interrupted: if use_dedicated_thread: time.sleep(1) else: self._read_carbon_packet(reader) tosend = [] with lock: for elem in elements.itervalues(): cmd = elem.get_command() if cmd: tosend.append(cmd) # we could send those in one shot ! # if it existed an ExternalCommand*s* items class.. TODO. for cmd in tosend: self.from_q.put(ExternalCommand(cmd)) n_cmd_sent += len(tosend) now = time.time() if now > next_clean: next_clean = now + clean_every if use_dedicated_thread: if not carbon_reader_thread.isAlive( ) and not self.interrupted: raise Exception( 'Carbon reader thread unexpectedly died.. exiting.' ) todel = [] with lock: for name, elem in elements.iteritems(): for perf_name, met_values in elem.perf_datas.items( ): if met_values[ 0].here_time < now - 3 * elem.interval: # this perf data has not been updated for more than 3 intervals, # purge it. del elem.perf_datas[perf_name] logger.info( '%s %s: 3*interval without data, purged.' % (elem, perf_name)) if not elem.perf_datas: todel.append(name) for name in todel: logger.info('%s : not anymore updated > purged.' % name) del elements[name] if now > next_report: next_report = now + report_every logger.info( '%s commands reported during last %s seconds.' % (n_cmd_sent, report_every)) n_cmd_sent = 0 except Exception as err: logger.error("[Carbon] Unexpected error: %s ; %s" % (err, traceback.format_exc())) raise finally: reader.close() if use_dedicated_thread: carbon_reader_thread.join()
def do_loop_turn(self): elements = self.elements lock = self.lock now = time.time() clean_every = 15 report_every = 60 next_clean = now + clean_every next_report = now + report_every n_cmd_sent = 0 reader = ShinkenCollectdReader( self.host, self.port, self.multicast, grouped_collectd_plugins=self.grouped_collectd_plugins) try: if self.use_decicated_thread: collectd_reader_thread = threading.Thread( target=self._read_collectd, args=(reader, )) collectd_reader_thread.start() while not self.interrupted: if self.use_decicated_thread: with self.cond: if not self.send_ready: self.cond.wait(1) self.send_ready = False # or, simply poll every sec ? : # time.sleep(1) else: self._read_collectd_packet(reader) todel = [] tosend = [] with lock: for name, elem in elements.iteritems(): assert isinstance(elem, Element) cmd = elem.get_command() if cmd: tosend.append(cmd) # we could send those in one shot ! n_cmd_sent += len(tosend) for cmd in tosend: self.from_q.put(ExternalCommand(cmd)) now = time.time() if now > next_clean: next_clean = now + clean_every #if not collectd_reader_thread.isAlive(): # raise Exception('Collectd read thread unexpectedly died.. exiting.') with lock: for name, elem in elements.iteritems(): for pname, vvalues in elem.perf_datas.items(): if vvalues[0][-1] < now - 3 * elem.interval: # this perf data has not been updated for more than 2 intervals, # purge it. del elem.perf_datas[pname] logger.info('%s : purged %s' % (elem, pname)) if not elem.perf_datas: todel.append(name) for name in todel: logger.info('%s : not anymore updated > purged.' % name) del elements[name] if now > next_report: next_report = now + report_every logger.info( '%s commands reported during last %s seconds.' % (n_cmd_sent, report_every)) n_cmd_sent = 0 except Exception as err: logger.error("[Collectd] Unexpected error: %s ; %s" % (err, traceback.format_exc())) finally: reader.close() if self.use_decicated_thread: collectd_reader_thread.join()
def test_simple_dispatch_and_addition(self): print "The dispatcher", self.dispatcher # dummy for the arbiter for a in self.conf.arbiters: a.__class__ = GoodArbiter print "Preparing schedulers" scheduler1 = self.conf.schedulers.find_by_name('scheduler-all-1') self.assertIsNot(scheduler1, None) scheduler1.__class__ = GoodScheduler scheduler2 = self.conf.schedulers.find_by_name('scheduler-all-2') self.assertIsNot(scheduler2, None) scheduler2.__class__ = BadScheduler print "Preparing pollers" poller1 = self.conf.pollers.find_by_name('poller-all-1') self.assertIsNot(poller1, None) poller1.__class__ = GoodPoller poller2 = self.conf.pollers.find_by_name('poller-all-2') self.assertIsNot(poller2, None) poller2.__class__ = BadPoller print "Preparing reactionners" reactionner1 = self.conf.reactionners.find_by_name('reactionner-all-1') self.assertIsNot(reactionner1, None) reactionner1.__class__ = GoodReactionner reactionner2 = self.conf.reactionners.find_by_name('reactionner-all-2') self.assertIsNot(reactionner2, None) reactionner2.__class__ = BadReactionner print "Preparing brokers" broker1 = self.conf.brokers.find_by_name('broker-all-1') self.assertIsNot(broker1, None) broker1.__class__ = GoodBroker broker2 = self.conf.brokers.find_by_name('broker-all-2') self.assertIsNot(broker2, None) broker2.__class__ = BadBroker # Ping all elements. Should have 1 as OK, 2 as # one bad attempt (3 max) self.dispatcher.check_alive() # Check good values self.assertEqual(True, scheduler1.alive) self.assertEqual(0, scheduler1.attempt) self.assertEqual(True, scheduler1.reachable) # still alive, just unreach self.assertEqual(True, scheduler2.alive) self.assertEqual(1, scheduler2.attempt) self.assertEqual(False, scheduler2.reachable) # and others satellites too self.assertEqual(True, poller1.alive) self.assertEqual(0, poller1.attempt) self.assertEqual(True, poller1.reachable) # still alive, just unreach self.assertEqual(True, poller2.alive) self.assertEqual(1, poller2.attempt) self.assertEqual(False, poller2.reachable) # and others satellites too self.assertEqual(True, reactionner1.alive) self.assertEqual(0, reactionner1.attempt) self.assertEqual(True, reactionner1.reachable) # still alive, just unreach self.assertEqual(True, reactionner2.alive) self.assertEqual(1, reactionner2.attempt) self.assertEqual(False, reactionner2.reachable) # and others satellites too self.assertEqual(True, broker1.alive) self.assertEqual(0, broker1.attempt) self.assertEqual(True, broker1.reachable) # still alive, just unreach self.assertEqual(True, broker2.alive) self.assertEqual(1, broker2.attempt) self.assertEqual(False, broker2.reachable) time.sleep(60) ### Now add another attempt, still alive, but attemp=2/3 self.dispatcher.check_alive() # Check good values self.assertEqual(True, scheduler1.alive) self.assertEqual(0, scheduler1.attempt) self.assertEqual(True, scheduler1.reachable) # still alive, just unreach self.assertEqual(True, scheduler2.alive) self.assertEqual(2, scheduler2.attempt) self.assertEqual(False, scheduler2.reachable) # and others satellites too self.assertEqual(True, poller1.alive) self.assertEqual(0, poller1.attempt) self.assertEqual(True, poller1.reachable) # still alive, just unreach self.assertEqual(True, poller2.alive) self.assertEqual(2, poller2.attempt) self.assertEqual(False, poller2.reachable) # and others satellites too self.assertEqual(True, reactionner1.alive) self.assertEqual(0, reactionner1.attempt) self.assertEqual(True, reactionner1.reachable) # still alive, just unreach self.assertEqual(True, reactionner2.alive) self.assertEqual(2, reactionner2.attempt) self.assertEqual(False, reactionner2.reachable) # and others satellites too self.assertEqual(True, broker1.alive) self.assertEqual(0, broker1.attempt) self.assertEqual(True, broker1.reachable) # still alive, just unreach self.assertEqual(True, broker2.alive) self.assertEqual(2, broker2.attempt) self.assertEqual(False, broker2.reachable) time.sleep(60) ### Now we get BAD, We go DEAD for N2! self.dispatcher.check_alive() # Check good values self.assertEqual(True, scheduler1.alive) self.assertEqual(0, scheduler1.attempt) self.assertEqual(True, scheduler1.reachable) # still alive, just unreach self.assertEqual(False, scheduler2.alive) self.assertEqual(3, scheduler2.attempt) self.assertEqual(False, scheduler2.reachable) # and others satellites too self.assertEqual(True, poller1.alive) self.assertEqual(0, poller1.attempt) self.assertEqual(True, poller1.reachable) # still alive, just unreach self.assertEqual(False, poller2.alive) self.assertEqual(3, poller2.attempt) self.assertEqual(False, poller2.reachable) # and others satellites too self.assertEqual(True, reactionner1.alive) self.assertEqual(0, reactionner1.attempt) self.assertEqual(True, reactionner1.reachable) # still alive, just unreach self.assertEqual(False, reactionner2.alive) self.assertEqual(3, reactionner2.attempt) self.assertEqual(False, reactionner2.reachable) # and others satellites too self.assertEqual(True, broker1.alive) self.assertEqual(0, broker1.attempt) self.assertEqual(True, broker1.reachable) # still alive, just unreach self.assertEqual(False, broker2.alive) self.assertEqual(3, broker2.attempt) self.assertEqual(False, broker2.reachable) # Now we check how we should dispatch confs self.dispatcher.check_dispatch() # the conf should not be in a good shape self.assertEqual(False, self.dispatcher.dispatch_ok) # Now we really dispatch them! self.dispatcher.dispatch() self.assert_any_log_match('Dispatch OK of conf in scheduler scheduler-all-1') self.assert_any_log_match('Dispatch OK of configuration 0 to reactionner reactionner-all-1') self.assert_any_log_match('Dispatch OK of configuration 0 to poller poller-all-1') self.assert_any_log_match('Dispatch OK of configuration 0 to broker broker-all-1') self.clear_logs() # And look if we really dispatch conf as we should for r in self.conf.realms: for cfg in r.confs.values(): self.assertEqual(True, cfg.is_assigned) self.assertEqual(scheduler1, cfg.assigned_to) cmd = "[%lu] ADD_SIMPLE_POLLER;All;newpoller;localhost;7771" % int(time.time()) ext_cmd = ExternalCommand(cmd) self.external_command_dispatcher.resolve_command(ext_cmd) # Look for the poller now newpoller = self.conf.pollers.find_by_name('newpoller') self.assertIsNot(newpoller, None) newpoller.__class__ = GoodPoller ### Wht now with our new poller object? self.dispatcher.check_alive() # Check good values self.assertEqual(True, newpoller.alive) self.assertEqual(0, newpoller.attempt) self.assertEqual(True, newpoller.reachable) # Now we check how we should dispatch confs self.dispatcher.check_bad_dispatch() self.dispatcher.dispatch()
logger.debug("[WS_Arbiter] return_code_list: %s" % (return_code_list)) output_list = request.forms.getall(key='output') logger.debug("[WS_Arbiter] output_list: %s" % (output_list)) commands_list = get_commands(time_stamp_list, host_name_list, service_description_list, return_code_list, output_list) except Exception, e: logger.error("[WS_Arbiter] failed to get the lists: %s" % str(e)) commands_list = [] check_auth() # Adding commands to the main queue() logger.debug("[WS_Arbiter] commands: %s" % str(sorted(commands_list))) for c in sorted(commands_list): ext = ExternalCommand(c) app.from_q.put(ext) # OK here it's ok, it will return a 200 code def do_restart(): # Getting lists of informations for the commands time_stamp = request.forms.get('time_stamp', int(time.time())) command = '[%s] RESTART_PROGRAM\n' % time_stamp check_auth() # Adding commands to the main queue() logger.warning("[WS_Arbiter] command: %s" % str(command)) ext = ExternalCommand(command)