Exemplo n.º 1
0
class RabbitMQ(Collector):
    parameters = {
        'uri': StringParameter(default='http://localhost:15672/api/overview'),
        'user': StringParameter(default='guest'),
        'password': StringParameter(default='guest'),
    }

    def launch(self):
        logger = self.logger
        logger.debug('getRabbitMQStatus: start')

        if not self.is_in_group('rabbitmq'):
            self.set_not_eligible(
                'Please add the rabbitmq group to enable this collector.')
            return

        try:
            uri = self.get_parameter('uri')
            user = self.get_parameter('user')
            password = self.get_parameter('password')
            response = httper.get(uri, timeout=3, user=user, password=password)

        except get_http_exceptions() as e:
            self.set_error('Unable to get RabbitMQ status - HTTPError = %s' %
                           e)
            return False

        except Exception:
            self.set_error('Unable to get RabbitMQ status - Exception = %s' %
                           traceback.format_exc())
            return False

        try:
            status = jsoner.loads(response)
        except Exception as exp:
            self.set_error("Rabbitmq: parsing json: %s" % exp)
            return False

        return status
Exemplo n.º 2
0
class WebSocketModule(ListenerModule):
    implement = 'websocket'
    
    parameters = {
        'enabled': BoolParameter(default=False),
        'port'   : IntParameter(default=6769),
        'address': StringParameter(default='0.0.0.0'),
    }
    
    
    def __init__(self):
        ListenerModule.__init__(self)
        self.websocket = {}
        self.webso = None
    
    
    def get_info(self):
        r = {'log': ''}
        
        r['configuration'] = self.websocket
        r['state'] = 'STARTED' if self.websocket['enabled'] else 'DISABLED'
        if not self.webso:
            r['websocket_info'] = None
        else:
            r['websocket_info'] = self.webso.get_info()
        
        return r
    
    
    def prepare(self):
        self.websocket['enabled'] = self.get_parameter('enabled')
        self.websocket['port'] = self.get_parameter('port')
        self.websocket['address'] = self.get_parameter('address')
    
    
    def launch(self):
        if not self.websocket['enabled']:
            self.logger.log('Websocket object defined in the configuration is disabled, skipping websocket launch')
            return
        
        threader.create_and_launch(self.do_launch, name='Websocket port:%d listening' % self.websocket.get('port'), essential=True, part='websocket')
    
    
    def do_launch(self):
        self.webso = WebSocketBackend(self.websocket)
        # also load it in the websockermanager so other part
        # can easily forward messages
        websocketmgr.set(self.webso)
        self.webso.run()
Exemplo n.º 3
0
class ShinkenModule(ConnectorModule):
    implement = 'shinken'

    parameters = {
        'enabled':
        BoolParameter(default=False),
        'cfg_path':
        StringParameter(default='/etc/shinken/agent'),
        'reload_command':
        StringParameter(default='/etc/init.d/shinken reload'),
        'monitoring_tool':
        StringParameter(default='shinken'),
        'external_command_file':
        StringParameter(default='/var/lib/shinken/shinken.cmd'),
    }

    def __init__(self):
        ConnectorModule.__init__(self)
        self.regenerate_flag = False
        self.reload_flag = False
        self.cfg_path = None
        self.node_changes = []
        self.reload_command = ''
        self.monitoring_tool = 'shinken'
        self.external_command_file = '/var/lib/shinken/shinken.cmd'
        self.enabled = False
        self.export_states_uuids = set()

    def prepare(self):
        self.logger.info('SHINKEN: prepare phase')
        self.cfg_path = os.path.abspath(self.get_parameter('cfg_path'))
        self.reload_command = self.get_parameter('reload_command')
        self.monitoring_tool = self.get_parameter('monitoring_tool')
        self.external_command_file = self.get_parameter(
            'external_command_file')
        self.enabled = self.get_parameter('enabled')
        # Simulate that we are a new node, to always export our states at startup
        self.node_changes.append(('new-node', gossiper.uuid))
        # register to node events
        pubsub.sub('new-node', self.new_node_callback)
        pubsub.sub('delete-node', self.delete_node_callback)
        pubsub.sub('change-node', self.change_node_callback)

    def get_info(self):
        state = 'STARTED' if self.enabled else 'DISABLED'
        log = ''
        return {'configuration': self.get_config(), 'state': state, 'log': log}

    def launch(self):
        self.shinken_thread = threader.create_and_launch(
            self.main_thread,
            name='Export nodes/checks and states to Shinken',
            essential=True,
            part='shinken')

    def new_node_callback(self, node_uuid=None):
        if not self.enabled:
            return
        self.node_changes.append(('new-node', node_uuid))
        self.regenerate_flag = True

    def delete_node_callback(self, node_uuid=None):
        if not self.enabled:
            return
        self.node_changes.append(('delete-node', node_uuid))
        self.regenerate_flag = True

    def change_node_callback(self, node_uuid=None):
        if not self.enabled:
            return
        self.node_changes.append(('change-node', node_uuid))
        self.regenerate_flag = True

    def sanatize_check_name(self, cname):
        return 'Agent-%s' % cname.split('/')[-1]

    def export_all_states(self):
        p = self.external_command_file
        if not os.path.exists(p):
            self.logger.warning(
                'Shinken command file %s is missing, skipping node information export'
                % p)
            return

        # Now the nagios is ready, we can export our states
        for nid in self.export_states_uuids:
            self.__export_states_into_shinken(
                nid)  # update it's inner checks states
        self.export_states_uuids.clear()

    def __export_states_into_shinken(self, nuuid):
        p = self.external_command_file

        v = kvmgr.get_key('__health/%s' % nuuid)
        if v is None or v == '':
            self.logger.error('Cannot access to the checks list for', nuuid)
            return

        lst = jsoner.loads(v)
        for cname in lst:
            v = kvmgr.get_key('__health/%s/%s' % (nuuid, cname))
            if v is None:  # missing check entry? not a real problem
                continue
            check = jsoner.loads(v)
            self.logger.debug('CHECK VALUE %s' % check)
            try:
                mode = 'w' if PY3 else 'a'  # codecs.open got issue with a in python 3
                f = codecs.open(p, mode, encoding="utf-8")
                cmd = '[%s] PROCESS_SERVICE_CHECK_RESULT;%s;%s;%d;%s\n' % (
                    int(time.time()), nuuid, self.sanatize_check_name(cname),
                    check['state_id'], check['output'])
                self.logger.debug('SAVING COMMAND %s' % cmd)
                f.write(cmd)
                f.flush()
                f.close()
            except Exception as exp:
                self.logger.error('Shinken command file write fail: %s' % exp)
                return

    def __get_node_cfg_sha_paths(self, nid):
        cfg_p = os.path.join(self.cfg_path, nid + '.cfg')
        sha_p = os.path.join(self.cfg_path, nid + '.sha1')
        return (cfg_p, sha_p)

    def generate_node_file(self, n):
        uuid = n.get('uuid')
        if not os.path.exists(self.cfg_path):
            try:
                os.mkdir(self.cfg_path)
            except Exception as exp:
                self.logger.error('Cannot create shinken directory at %s : %s',
                                  self.cfg_path, str(exp))
                return
        self.logger.debug('Generating cfg/sha file for node %s' % n)
        p, shap = self.__get_node_cfg_sha_paths(uuid)
        # p = os.path.join(self.cfg_path, uuid + '.cfg')
        ptmp = p + '.tmp'
        # shap = os.path.join(self.cfg_path, uuid + '.sha1')
        shaptmp = shap + '.tmp'

        old_sha_value = ''
        if os.path.exists(shap):
            try:
                f = open(shap, 'r')
                old_sha_value = f.read().strip()
                f.close()
            except Exception as exp:
                self.logger.error('Cannot read old sha file value at %s: %s' %
                                  (shap, exp))

        tpls = n.get('groups', [])[:]  # make a copy, because we will modify it
        zone = n.get('zone', '')
        if zone:
            tpls.append(zone)
        tpls.insert(0, 'agent,opsbro')

        # get checks names and sort them so file il always the same
        cnames = list(n.get('checks', {}).keys())  # list() for python3
        cnames.sort()

        # Services must be purely passive, and will only trigger once
        buf_service = '''define service{
            host_name               %s
            service_description     %s
            use                     generic-service
            active_checks_enabled   0
            passive_checks_enabled  1
            check_command           check-host-alive
            max_check_attempts      1
        \n}\n
        '''
        # NOTE: nagios is not liking templates that are not exiting, so only export with generic-host
        # shinken don't care, so we can give all we want here
        use_value = ','.join(tpls)
        if self.monitoring_tool == 'nagios':
            use_value = 'generic-host'

        buf = '''# Auto generated host, do not edit
        \ndefine host{
            host_name      %s
            display_name   %s
            address        %s
            use            %s
            check_period                    24x7
            check_interval                  1
            retry_interval                  1
            max_check_attempts              2
        \n}\n
        \n%s\n''' % (n['uuid'], n['name'], n['addr'], use_value, '\n'.join([
            buf_service % (n['uuid'], self.sanatize_check_name(cname))
            for cname in cnames
        ]))
        buf_sha = get_sha1_hash(buf)

        # if it the same as before?
        self.logger.debug('COMPARING OLD SHA/NEWSHA= %s   %s' %
                          (old_sha_value, buf_sha))
        if buf_sha == old_sha_value:
            self.logger.debug('SAME SHA VALUE, SKIP IT')
            return

        self.logger.info('Will generate in path %s (sha1=%s): \n%s' %
                         (p, buf_sha, buf))
        try:
            # open both file, so if one goes wrong, will be consistent
            fcfg = open(ptmp, 'w')
            fsha = open(shaptmp, 'w')
            # save cfg file
            fcfg.write(buf)
            fcfg.close()
            shutil.move(ptmp, p)
            # and then sha one
            fsha.write(buf_sha)
            fsha.close()
            shutil.move(shaptmp, shap)
        except IOError as exp:
            try:
                fcfg.close()
            except:
                pass
            try:
                fsha.close()
            except:
                pass
            self.logger.error('Cannot create shinken node file at %s : %s' %
                              (p, exp))
            return
        self.logger.info('Generated file %s for node %s' % (p, uuid))
        # We did change configuration, reload shinken
        self.reload_flag = True

    # A specific node id was detected as not need, try to clean it
    def clean_node_files(self, nid):
        cfgp, shap = self.__get_node_cfg_sha_paths(nid)
        if os.path.exists(cfgp):
            try:
                os.unlink(cfgp)
                # We did remove a file, reload shinken so
                self.reload_flag = True
            except IOError as exp:
                self.logger.error('Cannot remove deprecated file %s' % cfgp)
        if os.path.exists(shap):
            try:
                os.unlink(shap)
            except IOError as exp:
                self.logger.error('Cannot remove deprecated file %s' % shap)

    def clean_cfg_dir(self):
        if not self.cfg_path:  # nothing to clean...
            return
        node_keys = gossiper.nodes.keys()
        self.logger.debug('Current nodes uuids: %s' % node_keys)
        # First look at cfg file that don't match our inner elements, based on their file name
        # Note: if the user did do something silly, no luck for him!
        cfgs = glob.glob('%s/*.cfg' % self.cfg_path)
        self.logger.info('Looking at files for cleaning %s' % cfgs)
        lpath = len(self.cfg_path) + 1
        for cfg in cfgs:
            fuuid_ = cfg[
                lpath:-len('.cfg')]  # get only the uuid part of the file name
            self.logger.debug('Should we clean cfg file %s' % fuuid_)
            if fuuid_ not in node_keys:
                self.logger.info('We clean deprecated cfg file %s' % cfg)
                self.clean_node_files(fuuid_)

    # main method to export http interface. Must be in a method that got
    # a self entry
    def main_thread(self):
        # If the detector did not run, we are not sure about the groups of the local node
        # so wait for it to be run, so we can generate shinken file ok from start
        while detecter.did_run == False:
            time.sleep(1)

        self.enabled = self.get_parameter('enabled')
        while not self.enabled:
            self.enabled = self.get_parameter('enabled')
            time.sleep(1)

        if self.cfg_path is not None:
            self.clean_cfg_dir()
            # First look at all nodes in the gossip ring and regerate them
            node_keys = gossiper.nodes.keys()
            for nid in node_keys:
                n = gossiper.get(nid)
                if n is None:
                    continue
                self.generate_node_file(n)

        while not stopper.is_stop():
            self.logger.debug('Shinken loop, regenerate [%s]' %
                              self.regenerate_flag)

            # If we can, export all states into the nagios/shinken daemon as passive checks
            self.export_all_states()

            time.sleep(1)

            # If not initialize, skip loop
            if self.cfg_path is None or gossiper is None:
                continue

            # If nothing to do in configuration, skip it too
            if not self.regenerate_flag:
                continue

            self.logger.info('Shinken callback raised, managing events: %s' %
                             self.node_changes)
            # Set that we will manage all now
            self.regenerate_flag = False
            node_ids = self.node_changes
            self.node_changes = []
            for (evt, nid) in node_ids:
                n = gossiper.get(nid)
                if evt == 'new-node':
                    if n is None:  # maybe someone just delete the node?
                        continue
                    self.logger.info('Manage new node %s' % n)
                    self.generate_node_file(n)
                    self.export_states_uuids.add(nid)
                elif evt == 'delete-node':
                    self.logger.info('Removing deleted node %s' % nid)
                    self.clean_node_files(nid)
                elif evt == 'change-node':
                    self.logger.info(
                        'A node did change, updating its configuration. Node %s'
                        % nid)
                    self.generate_node_file(n)
                    self.export_states_uuids.add(nid)

            # If we need to reload and have a reload commmand, do it
            if self.reload_flag and self.reload_command:
                self.reload_flag = False
                rc, stdout, stderr = exec_command(self.reload_command)
                stdout += stderr
                if rc != 0:
                    self.logger.error('Cannot reload monitoring daemon: %s' %
                                      stdout)
                    return

                self.logger.info('Monitoring daemon reload: OK')
                payload = {'type': 'shinken-restart'}
                gossiper.stack_event_broadcast(payload)
Exemplo n.º 4
0
class StatsdModule(ListenerModule):
    implement = 'statsd'

    parameters = {
        'enabled_if_group': StringParameter(default='statsd-listener'),
        'port': IntParameter(default=8125),
        'interval': IntParameter(default=10),
        'address': StringParameter(default='0.0.0.0'),
    }

    def __init__(self):
        ListenerModule.__init__(self)
        self.statsd = None

        self.enabled = False
        self.enabled_if_group = 'statsd-listener'

        self.port = 0
        self.udp_sock = None
        self.addr = '0.0.0.0'
        self.last_write = time.time()
        self.nb_data = 0

        # Do not step on your own foot...
        self.stats_lock = threading.RLock()

        # our main data structs
        self.gauges = {}
        self.timers = {}
        self.histograms = {}
        self.counters = {}

        # Numpy lib is heavy, don't load it unless we really need it
        self.np = None

        # if we never got any metrics, we do a large wait for thread
        # but as soon as we have one, go into small waits
        self.did_have_metrics = False

    def prepare(self):
        self.logger.debug('Statsd: prepare phase')
        self.statsd_port = self.get_parameter('port')
        self.stats_interval = self.get_parameter('interval')
        self.addr = self.get_parameter('address')

    # Prepare to open the UDP port
    def __open_socket(self):
        # We need the numpy
        if self.np is None:
            try:
                import numpy as np
                self.np = np
            except ImportError:
                self.logger.error('The numpy librairy is not installed')
                self.np = None
                return
        self.udp_sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)  # UDP
        self.udp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF, 1048576)
        self.udp_sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
        self.logger.debug(
            self.udp_sock.getsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF))
        self.udp_sock.bind((self.addr, self.statsd_port))
        self.logger.info("TS UDP port open", self.statsd_port)
        self.logger.debug(
            "UDP RCVBUF",
            self.udp_sock.getsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF))

    def __close_socket(self):
        if self.udp_sock:
            self.udp_sock.close()
        self.udp_sock = None

    def get_info(self):
        state = 'STARTED' if self.enabled else 'DISABLED'
        log = ''
        if self.enabled and self.np is None:
            log = 'ERROR: cannot start the module: missing python-numpy package'
            state = 'ERROR'
        return {'configuration': self.get_config(), 'state': state, 'log': log}

    def launch(self):
        threader.create_and_launch(self.launch_statsd_udp_listener,
                                   name='UDP port:%d listening' %
                                   self.statsd_port,
                                   essential=True,
                                   part='statsd')
        threader.create_and_launch(self.launch_compute_stats_thread,
                                   name='Stats computing',
                                   essential=True,
                                   part='statsd')

    # The compute stats thread compute the STATSD values each X
    # seconds and push them into the classic TS part
    def launch_compute_stats_thread(self):
        while not stopper.is_stop():
            now = time.time()
            if now > self.last_write + self.stats_interval:
                self.compute_stats()
                self.last_write = now
            if self.did_have_metrics:  # small wait
                time.sleep(0.1)
            else:
                time.sleep(5)  # can wait a bit for the first run

    def compute_stats(self):
        now = int(time.time())
        self.logger.debug("Computing stats")

        # First gauges, we take the data and put a void dict instead so the other thread can work now
        with self.stats_lock:
            gauges = self.gauges
            self.gauges = {}

        for mname in gauges:
            _sum, nb, _min, _max = gauges[mname]
            _avg = _sum / float(nb)
            key = 'stats.gauges.' + mname
            tsmgr.tsb.add_value(now, key, _avg)
            key = 'stats.gauges.' + mname + '.min'
            tsmgr.tsb.add_value(now, key, _min)
            key = 'stats.gauges.' + mname + '.max'
            tsmgr.tsb.add_value(now, key, _max)

        # Now counters
        with self.stats_lock:
            counters = self.counters
            self.counters = {}

        for mname in counters:
            cvalue, ccount = counters[mname]
            # count
            key = 'stats.gauges.' + mname + '.count'
            tsmgr.tsb.add_value(now, key, cvalue)
            # rate
            key = 'stats.gauges.' + mname + '.rate'
            tsmgr.tsb.add_value(now, key, cvalue / self.stats_interval)

        # Now timers, lot of funs :)
        with self.stats_lock:
            timers = self.timers
            self.timers = {}

        _t = time.time()
        for (mname, timer) in timers.items():
            # We will need to compute the mean_99, count_99, upper_99, sum_99, sum_quares_99
            # but also std, upper, lower, count, count_ps, sum, sum_square, mean, median
            _t = time.time()
            npvalues = self.np.array(timer)
            # Mean
            mean = self.np.mean(npvalues)
            key = 'stats.timers.' + mname + '.mean'
            tsmgr.tsb.add_value(now, key, mean)

            # Upper 99th, percentile
            upper_99 = self.np.percentile(npvalues, 99)
            key = 'stats.timers.' + mname + '.upper_99'
            tsmgr.tsb.add_value(now, key, upper_99)

            # Sum 99
            sum_99 = npvalues[:(npvalues < upper_99).argmin()].sum()
            key = 'stats.timers.' + mname + '.sum_99'
            tsmgr.tsb.add_value(now, key, sum_99)

            # Standard deviation
            std = self.np.std(npvalues)
            key = 'stats.timers.' + mname + '.std'
            tsmgr.tsb.add_value(now, key, std)

            # Simple count
            count = len(timer)
            key = 'stats.timers.' + mname + '.count'
            tsmgr.tsb.add_value(now, key, count)

            # Sum of all
            _sum = self.np.sum(npvalues)
            key = 'stats.timers.' + mname + '.sum'
            tsmgr.tsb.add_value(now, key, _sum)

            # Median of all
            median = self.np.percentile(npvalues, 50)
            key = 'stats.timers.' + mname + '.median'
            tsmgr.tsb.add_value(now, key, median)

            # Upper of all
            upper = self.np.max(npvalues)
            key = 'stats.timers.' + mname + '.upper'
            tsmgr.tsb.add_value(now, key, upper)

            # Lower of all
            lower = self.np.min(npvalues)
            key = 'stats.timers.' + mname + '.lower'
            tsmgr.tsb.add_value(now, key, lower)

    # This is ht main STATSD UDP listener thread. Should not block and
    # be as fast as possible
    def launch_statsd_udp_listener(self):
        while not stopper.is_stop():

            if_group = self.get_parameter('enabled_if_group')
            self.enabled = gossiper.is_in_group(if_group)

            # Ok, if we are not enabled, so not even talk to statsd
            if not self.enabled:
                self.__close_socket()
                time.sleep(1)
                continue

            # maybe we were enabled, then not, then again, if so re-prepare
            if self.udp_sock is None:
                self.__open_socket()

            # Maybe we f**k on the socket or the numpy lib (maybe installation in progress)
            if self.udp_sock is None:
                self.logger.error(
                    'Seems that the socket or numpy are not realy, postpone the module initialiation'
                )
                time.sleep(1)
                continue
            try:
                data, addr = self.udp_sock.recvfrom(
                    65535)  # buffer size is 1024 bytes
            except socket.timeout:  # loop until we got something
                continue

            self.logger.debug("UDP: received message:", data, addr)
            # No data? bail out :)
            if len(data) == 0:
                continue
            self.logger.debug("GETDATA", data)

            for line in data.splitlines():
                # avoid invalid lines
                if '|' not in line:
                    continue
                elts = line.split('|', 1)
                # invalid, no type in the right part
                if len(elts) == 1:
                    continue

                _name_value = elts[0].strip()
                # maybe it's an invalid name...
                if ':' not in _name_value:
                    continue
                _nvs = _name_value.split(':')
                if len(_nvs) != 2:
                    continue
                mname = _nvs[0].strip()

                # We have a ral value, so we will allow now smaller wait time
                self.did_have_metrics = True

                # Two cases: it's for me or not
                hkey = hashlib.sha1(mname).hexdigest()
                ts_node_manager = gossiper.find_group_node('ts', hkey)
                # if it's me that manage this key, I add it in my backend
                if ts_node_manager != gossiper.uuid:
                    node = gossiper.get(ts_node_manager)
                    # threads are dangerous things...
                    if node is None:
                        continue

                    # TODO: do bulk send of this, like for graphite
                    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
                    # do NOT use the node['port'], it's the internal communication, not the graphite one!
                    sock.sendto(line, (node['addr'], self.statsd_port))
                    sock.close()
                    continue

                # Here we are sure it's really for us, so manage it :)
                value = to_best_int_float(_nvs[1].strip())
                if not mname or value is None:
                    continue

                # Look at the type of the data
                _type = elts[1].strip()
                if len(_type) == 0:
                    continue

                ## Gauge: <metric name>:<value>|g
                elif _type == 'g':
                    self.nb_data += 1
                    self.logger.log('GAUGE', mname, value)
                    with self.stats_lock:
                        gentry = self.gauges.get(mname, None)
                        if gentry is None:
                            # sum, nb, min, max
                            gentry = (0.0, 0, None, None)
                        _sum, nb, _min, _max = gentry
                        _sum += value
                        nb += 1
                        if _min is None or value < _min:
                            _min = value
                        if _max is None or value > _max:
                            _max = value
                        self.gauges[mname] = (_sum, nb, _min, _max)
                        self.logger.debug('NEW GAUGE', mname,
                                          self.gauges[mname])

                ## Timers: <metric name>:<value>|ms
                ## But also
                ## Histograms: <metric name>:<value>|h
                elif _type == 'ms' or _type == 'h':
                    self.logger.debug('timers', mname, value)
                    # TODO: avoid the SET each time
                    timer = self.timers.get(mname, [])
                    timer.append(value)
                    self.timers[mname] = timer
                ## Counters: <metric name>:<value>|c[|@<sample rate>]
                elif _type == 'c':
                    self.nb_data += 1
                    self.logger.info('COUNTER', mname, value, "rate", 1)
                    with self.stats_lock:
                        cvalue, ccount = self.counters.get(mname, (0, 0))
                        self.counters[mname] = (cvalue + value, ccount + 1)
                        self.logger.debug('NEW COUNTER', mname,
                                          self.counters[mname])
                        ## Meters: <metric name>:<value>|m
                elif _type == 'm':
                    self.logger.debug('METERs', mname, value)
                else:  # unknow type, maybe a c[|@<sample rate>]
                    if _type[0] == 'c':
                        self.nb_data += 1
                        if not '|' in _type:
                            continue
                        srate = _type.split('|')[1].strip()
                        if len(srate) == 0 or srate[0] != '@':
                            continue
                        try:
                            rate = float(srate[1:])
                        except ValueError:
                            continue
                        # Invalid rate, 0.0 is invalid too ;)
                        if rate <= 0.0 or rate > 1.0:
                            continue
                        self.logger.debug('COUNTER', mname, value, "rate",
                                          rate)
                        with self.stats_lock:
                            cvalue, ccount = self.counters.get(mname, (0, 0))
                            self.logger.debug('INCR counter', (value / rate))
                            self.counters[mname] = (cvalue + (value / rate),
                                                    ccount + 1 / rate)
                            self.logger.debug('NEW COUNTER', mname,
                                              self.counters[mname])
Exemplo n.º 5
0
class DNSModule(ListenerModule):
    implement = 'dns'

    parameters = {
        'enabled_if_group': StringParameter(default='dns-listener'),
        'port': IntParameter(default=6766),
        'domain': StringParameter(default='.opsbro'),
    }

    def __init__(self):
        super(DNSModule, self).__init__()
        self.enabled = False
        self.port = 0
        self.domain = ''
        self.sock = None

        # Let my logger to the sub class
        DNSQuery.logger = self.logger

    def get_my_parameters(self):
        if_group = self.get_parameter('enabled_if_group')
        enabled = gossiper.is_in_group(if_group)
        self.logger.debug('Looking if the group %s is matching: %s' %
                          (if_group, enabled))
        port = self.get_parameter('port')
        domain = self.get_parameter('domain')
        # assume that domain is like .foo.
        if not domain.endswith('.'):
            domain += '.'
        if not domain.startswith('.'):
            domain = '.' + domain
        return enabled, port, domain

    def get_info(self):
        state = 'STARTED' if self.enabled else 'DISABLED'
        log = ''
        return {'configuration': self.get_config(), 'state': state, 'log': log}

    def launch(self):
        threader.create_and_launch(self.do_launch,
                                   name='UDP port:%d listening' % self.port,
                                   essential=True,
                                   part='dns')

    def close_socket(self):
        if self.sock is None:
            return
        try:
            self.sock.close()
        except Exception as exp:
            self.logger.error('Cannot close DNS socket: %s' % exp)
        self.sock = None

    def bind(self):
        # Always be sure to close our socket if binding a new
        self.close_socket()
        self.logger.info('Opening UDP port')
        # Prepare the socket in the prepare phase because it's mandatory
        self.sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        self.logger.info('DNS launched server port %d' % self.port)
        try:
            self.sock.bind(('', self.port))
        except Exception as exp:
            self.logger.error('Cannot open the DNS port %s : %s' %
                              (self.port, exp))
            self.sock = None

    def do_launch(self):
        # If the detector did not run, we are not sure about the groups of the local node
        # so wait for it to be run
        while not detecter.did_run:
            time.sleep(1)

        while not stopper.is_stop():
            # Note: domain is dynamic in analysis, don't need to look at differences
            was_enabled, prev_port = self.enabled, self.port
            self.enabled, self.port, self.domain = self.get_my_parameters()

            # Manage stop or skip loop
            if not self.enabled:
                # If we are going to stop, close our socket and wait for new enabled
                if was_enabled:
                    self.close_socket()
                # Ok wait a bit
                time.sleep(1)
                continue

            # Multiple cases will need us to open/reopen the socket
            # but we want to do it only once
            reopen = False

            # We are enabled, maybe we were not just before
            # if so we must bind our port
            if not was_enabled:
                reopen = True

            # Maybe just the port did change
            if self.port != prev_port:
                reopen = True

            # Maybe we fail to open it before (port already open ?)
            if self.sock is None:
                reopen = True

            # Ok if we need to reopen, do it
            if reopen:
                self.bind()

            # But maybe we did miss the bind
            # so skip this turn
            if self.sock is None:
                time.sleep(1)
                continue

            # Ok we are good :)
            try:
                data, addr = self.sock.recvfrom(1024)
            except socket.timeout:
                continue  # loop until we got some data :)

            try:
                p = DNSQuery(data)
                r = p.lookup_for_nodes(self.domain)
                self.logger.debug("DNS lookup nodes response:", r)
                self.sock.sendto(p.response(r), addr)
            except Exception:
                self.logger.error('Module got issue: %s' %
                                  (str(traceback.format_exc())))
Exemplo n.º 6
0
class Mongodb(Collector):
    parameters = {
        'uri': StringParameter(default='mongodb://localhost'),
        'user': StringParameter(default=''),
        'password': StringParameter(default=''),
        'replicat_set': BoolParameter(default=False),
    }

    def __init__(self):
        super(Mongodb, self).__init__()
        self.pymongo = None
        self.mongoDBStore = None

    def _clean_struct(self, e):
        to_del = []
        if isinstance(e, dict):
            for (k, v) in e.items():
                if isinstance(v, dict):
                    self._clean_struct(v)
                    continue
                if isinstance(v, list) or isinstance(v, tuple):
                    for sub_e in v:
                        self._clean_struct(sub_e)
                    continue
                if not isinstance(v, Number) and not isinstance(v, basestring):
                    self.logger.debug('CLEANING bad entry type: %s %s %s' %
                                      (k, v, type(v)))
                    to_del.append(k)
                    continue
        for k in to_del:
            del e[k]

    def launch(self):
        logger = self.logger
        logger.debug('getMongoDBStatus: start')

        if not self.is_in_group('mongodb'):
            self.set_not_eligible(
                'Please add the mongodb group to enable this collector.')
            return

        # Try to import pymongo from system (will be the best choice)
        # but if no available, switch to the embedded one
        # NOTE: the embedded is a 2.9.2 with centos 7.so file, but in other ditro only the c
        # extension will not load, but it's not a real problem as we don't care about the lib perf here
        if self.pymongo is None:
            try:
                import pymongo
                self.pymongo = pymongo
            except ImportError:
                my_dir = os.path.abspath(os.path.dirname(__file__))
                sys.path.insert(0, my_dir)
                try:
                    import pymongo
                    self.pymongo = pymongo
                except ImportError as exp:
                    self.set_error(
                        'Unable to import pymongo library, even the embedded one (%s)'
                        % exp)
                    return False
                finally:
                    try:
                        sys.path.remove(my_dir)
                    except:
                        pass

        try:
            mongoURI = ''
            parsed = urlparse(self.get_parameter('uri'))

            # Can't use attributes on Python 2.4
            if parsed[0] != 'mongodb':
                mongoURI = 'mongodb://'
                if parsed[2]:
                    if parsed[0]:
                        mongoURI = mongoURI + parsed[0] + ':' + parsed[2]
                    else:
                        mongoURI = mongoURI + parsed[2]
            else:
                mongoURI = self.get_parameter('uri')

            logger.debug('-- mongoURI: %s', mongoURI)
            if hasattr(self.pymongo, 'Connection'):  # Old pymongo
                conn = self.pymongo.Connection(mongoURI, slave_okay=True)
            else:  # new pymongo (> 2.9.5)
                conn = self.pymongo.MongoClient(mongoURI)
            logger.debug('Connected to MongoDB')
        except self.pymongo.errors.ConnectionFailure as exp:
            self.set_error(
                'Unable to connect to MongoDB server %s - Exception = %s' %
                (mongoURI, exp))
            return False

        # Older versions of pymongo did not support the command()
        # method below.
        try:
            db = conn['local']

            # Server status
            statusOutput = db.command(
                'serverStatus')  # Shorthand for {'serverStatus': 1}

            logger.debug('getMongoDBStatus: executed serverStatus')

            # Setup
            status = {'available': True}
            self._clean_struct(
                statusOutput)  # remove objects type we do not want
            status.update(statusOutput)

            # Version
            try:
                status['version'] = statusOutput['version']
                logger.debug('getMongoDBStatus: version %s',
                             statusOutput['version'])
            except KeyError as ex:
                logger.error(
                    'getMongoDBStatus: version KeyError exception = %s', ex)
                pass

            # Global locks
            try:
                logger.debug('getMongoDBStatus: globalLock')

                status['globalLock'] = {}
                status['globalLock']['ratio'] = statusOutput['globalLock'][
                    'ratio']

                status['globalLock']['currentQueue'] = {}
                status['globalLock']['currentQueue']['total'] = statusOutput[
                    'globalLock']['currentQueue']['total']
                status['globalLock']['currentQueue']['readers'] = statusOutput[
                    'globalLock']['currentQueue']['readers']
                status['globalLock']['currentQueue']['writers'] = statusOutput[
                    'globalLock']['currentQueue']['writers']

            except KeyError as ex:
                logger.debug(
                    'getMongoDBStatus: globalLock KeyError exception = %s' %
                    ex)
                pass

            # Memory
            try:
                logger.debug('getMongoDBStatus: memory')

                status['mem'] = {}
                status['mem']['resident'] = statusOutput['mem']['resident']
                status['mem']['virtual'] = statusOutput['mem']['virtual']
                status['mem']['mapped'] = statusOutput['mem']['mapped']

            except KeyError as ex:
                logger.debug(
                    'getMongoDBStatus: memory KeyError exception = %s', ex)
                pass

            # Connections
            try:
                logger.debug('getMongoDBStatus: connections')

                status['connections'] = {}
                status['connections']['current'] = statusOutput['connections'][
                    'current']
                status['connections']['available'] = statusOutput[
                    'connections']['available']

            except KeyError as ex:
                logger.debug(
                    'getMongoDBStatus: connections KeyError exception = %s',
                    ex)
                pass

            # Extra info (Linux only)
            try:
                logger.debug('getMongoDBStatus: extra info')

                status['extraInfo'] = {}
                status['extraInfo']['heapUsage'] = statusOutput['extra_info'][
                    'heap_usage_bytes']
                status['extraInfo']['pageFaults'] = statusOutput['extra_info'][
                    'page_faults']

            except KeyError as ex:
                logger.debug(
                    'getMongoDBStatus: extra info KeyError exception = %s', ex)
                pass

            # Background flushing
            try:
                logger.debug('getMongoDBStatus: backgroundFlushing')

                status['backgroundFlushing'] = {}
                delta = datetime.datetime.utcnow(
                ) - statusOutput['backgroundFlushing']['last_finished']
                status['backgroundFlushing'][
                    'secondsSinceLastFlush'] = delta.seconds
                status['backgroundFlushing']['lastFlushLength'] = statusOutput[
                    'backgroundFlushing']['last_ms']
                status['backgroundFlushing']['flushLengthAvrg'] = statusOutput[
                    'backgroundFlushing']['average_ms']

            except KeyError as ex:
                logger.debug(
                    'getMongoDBStatus: backgroundFlushing KeyError exception = %s',
                    ex)
                pass

            # Per second metric calculations (opcounts and asserts)
            try:
                if self.mongoDBStore is None:
                    logger.debug(
                        'getMongoDBStatus: per second metrics no cached data, so storing for first time'
                    )
                    self.setMongoDBStore(statusOutput)

                else:
                    logger.debug(
                        'getMongoDBStatus: per second metrics cached data exists'
                    )

                    accessesPS = float(
                        statusOutput['indexCounters']['btree']['accesses'] -
                        self.mongoDBStore['indexCounters']['btree']
                        ['accessesPS']) / 60

                    if accessesPS >= 0:
                        status['indexCounters'] = {}
                        status['indexCounters']['btree'] = {}
                        status['indexCounters']['btree'][
                            'accessesPS'] = accessesPS
                        status['indexCounters']['btree']['hitsPS'] = float(
                            statusOutput['indexCounters']['btree']['hits'] -
                            self.mongoDBStore['indexCounters']['btree']
                            ['hitsPS']) / 60
                        status['indexCounters']['btree']['missesPS'] = float(
                            statusOutput['indexCounters']['btree']['misses'] -
                            self.mongoDBStore['indexCounters']['btree']
                            ['missesPS']) / 60
                        status['indexCounters']['btree']['missRatioPS'] = float(
                            statusOutput['indexCounters']['btree']['missRatio']
                            - self.mongoDBStore['indexCounters']['btree']
                            ['missRatioPS']) / 60

                        status['opcounters'] = {}
                        status['opcounters']['insertPS'] = float(
                            statusOutput['opcounters']['insert'] -
                            self.mongoDBStore['opcounters']['insertPS']) / 60
                        status['opcounters']['queryPS'] = float(
                            statusOutput['opcounters']['query'] -
                            self.mongoDBStore['opcounters']['queryPS']) / 60
                        status['opcounters']['updatePS'] = float(
                            statusOutput['opcounters']['update'] -
                            self.mongoDBStore['opcounters']['updatePS']) / 60
                        status['opcounters']['deletePS'] = float(
                            statusOutput['opcounters']['delete'] -
                            self.mongoDBStore['opcounters']['deletePS']) / 60
                        status['opcounters']['getmorePS'] = float(
                            statusOutput['opcounters']['getmore'] -
                            self.mongoDBStore['opcounters']['getmorePS']) / 60
                        status['opcounters']['commandPS'] = float(
                            statusOutput['opcounters']['command'] -
                            self.mongoDBStore['opcounters']['commandPS']) / 60

                        status['asserts'] = {}
                        status['asserts']['regularPS'] = float(
                            statusOutput['asserts']['regular'] -
                            self.mongoDBStore['asserts']['regularPS']) / 60
                        status['asserts']['warningPS'] = float(
                            statusOutput['asserts']['warning'] -
                            self.mongoDBStore['asserts']['warningPS']) / 60
                        status['asserts']['msgPS'] = float(
                            statusOutput['asserts']['msg'] -
                            self.mongoDBStore['asserts']['msgPS']) / 60
                        status['asserts']['userPS'] = float(
                            statusOutput['asserts']['user'] -
                            self.mongoDBStore['asserts']['userPS']) / 60
                        status['asserts']['rolloversPS'] = float(
                            statusOutput['asserts']['rollovers'] -
                            self.mongoDBStore['asserts']['rolloversPS']) / 60

                        self.setMongoDBStore(statusOutput)
                    else:
                        logger.debug(
                            'getMongoDBStatus: per second metrics negative value calculated, mongod likely restarted, so clearing cache'
                        )
                        self.setMongoDBStore(statusOutput)

            except KeyError as ex:
                logger.debug(
                    'getMongoDBStatus: per second metrics KeyError exception = %s'
                    % ex)
                pass

            # Cursors
            try:
                logger.debug('getMongoDBStatus: cursors')

                status['cursors'] = {}
                status['cursors']['totalOpen'] = statusOutput['cursors'][
                    'totalOpen']

            except KeyError as ex:
                logger.debug(
                    'getMongoDBStatus: cursors KeyError exception = %s' % ex)
                pass

            # Replica set status
            if self.get_parameter('replicat_set'):
                logger.debug('getMongoDBStatus: get replset status too')

                # isMaster (to get state
                isMaster = db.command('isMaster')

                logger.debug('getMongoDBStatus: executed isMaster')

                status['replSet'] = {}
                status['replSet']['setName'] = isMaster['setName']
                status['replSet']['isMaster'] = isMaster['ismaster']
                status['replSet']['isSecondary'] = isMaster['secondary']

                if 'arbiterOnly' in isMaster:
                    status['replSet']['isArbiter'] = isMaster['arbiterOnly']

                logger.debug('getMongoDBStatus: finished isMaster')

                # rs.status()
                db = conn['admin']
                replSet = db.command('replSetGetStatus')

                logger.debug('getMongoDBStatus: executed replSetGetStatus')

                status['replSet']['myState'] = replSet['myState']
                status['replSet']['members'] = {}

                for member in replSet['members']:

                    logger.debug(
                        'getMongoDBStatus: replSetGetStatus looping %s',
                        member['name'])

                    status['replSet']['members'][str(member['_id'])] = {}
                    status['replSet']['members'][str(
                        member['_id'])]['name'] = member['name']
                    status['replSet']['members'][str(
                        member['_id'])]['state'] = member['state']

                    # Optime delta (only available from not self)
                    # Calculation is from http://docs.python.org/library/datetime.html#datetime.timedelta.total_seconds
                    if 'optimeDate' in member:  # Only available as of 1.7.2
                        deltaOptime = datetime.datetime.utcnow(
                        ) - member['optimeDate']
                        status['replSet']['members'][str(
                            member['_id'])]['optimeDate'] = (
                                deltaOptime.microseconds +
                                (deltaOptime.seconds +
                                 deltaOptime.days * 24 * 3600) * 10**6) / 10**6

                    if 'self' in member:
                        status['replSet']['myId'] = member['_id']

                    # Have to do it manually because total_seconds() is only available as of Python 2.7
                    else:
                        if 'lastHeartbeat' in member:
                            deltaHeartbeat = datetime.datetime.utcnow(
                            ) - member['lastHeartbeat']
                            status['replSet']['members'][str(
                                member['_id'])]['lastHeartbeat'] = (
                                    deltaHeartbeat.microseconds +
                                    (deltaHeartbeat.seconds +
                                     deltaHeartbeat.days * 24 * 3600) *
                                    10**6) / 10**6

                    if 'errmsg' in member:
                        status['replSet']['members'][str(
                            member['_id'])]['error'] = member['errmsg']

            # db.stats()
            logger.debug('getMongoDBStatus: db.stats() too')
            status['dbStats'] = {}
            for database in conn.database_names():
                if database != 'config' and database != 'local' and database != 'admin' and database != 'test':
                    logger.debug(
                        'getMongoDBStatus: executing db.stats() for %s',
                        database)
                    status['dbStats'][database] = conn[database].command(
                        'dbstats')
                    status['dbStats'][database]['namespaces'] = conn[database][
                        'system']['namespaces'].count()

                    # Ensure all strings to prevent JSON parse errors. We typecast on the server
                    for key in status['dbStats'][database].keys():
                        status['dbStats'][database][key] = str(
                            status['dbStats'][database][key])
                        # try a float/int cast
                        v = to_best_int_float(status['dbStats'][database][key])
                        if v is not None:
                            status['dbStats'][database][key] = v

        except Exception:
            logger.error('Unable to get MongoDB status - Exception = %s',
                         traceback.format_exc())
            return False

        logger.debug('getMongoDBStatus: completed, returning')

        return status

    def setMongoDBStore(self, statusOutput):
        self.mongoDBStore = {}

        self.mongoDBStore['indexCounters'] = {}
        self.mongoDBStore['indexCounters']['btree'] = {}
        self.mongoDBStore['indexCounters']['btree'][
            'accessesPS'] = statusOutput['indexCounters']['btree']['accesses']
        self.mongoDBStore['indexCounters']['btree']['hitsPS'] = statusOutput[
            'indexCounters']['btree']['hits']
        self.mongoDBStore['indexCounters']['btree']['missesPS'] = statusOutput[
            'indexCounters']['btree']['misses']
        self.mongoDBStore['indexCounters']['btree'][
            'missRatioPS'] = statusOutput['indexCounters']['btree'][
                'missRatio']

        self.mongoDBStore['opcounters'] = {}
        self.mongoDBStore['opcounters']['insertPS'] = statusOutput[
            'opcounters']['insert']
        self.mongoDBStore['opcounters']['queryPS'] = statusOutput[
            'opcounters']['query']
        self.mongoDBStore['opcounters']['updatePS'] = statusOutput[
            'opcounters']['update']
        self.mongoDBStore['opcounters']['deletePS'] = statusOutput[
            'opcounters']['delete']
        self.mongoDBStore['opcounters']['getmorePS'] = statusOutput[
            'opcounters']['getmore']
        self.mongoDBStore['opcounters']['commandPS'] = statusOutput[
            'opcounters']['command']

        self.mongoDBStore['asserts'] = {}
        self.mongoDBStore['asserts']['regularPS'] = statusOutput['asserts'][
            'regular']
        self.mongoDBStore['asserts']['warningPS'] = statusOutput['asserts'][
            'warning']
        self.mongoDBStore['asserts']['msgPS'] = statusOutput['asserts']['msg']
        self.mongoDBStore['asserts']['userPS'] = statusOutput['asserts'][
            'user']
        self.mongoDBStore['asserts']['rolloversPS'] = statusOutput['asserts'][
            'rollovers']
Exemplo n.º 7
0
class ShinkenEnterpriseModule(ConnectorModule):
    implement = 'shinken-enterprise'

    parameters = {
        'enabled': BoolParameter(default=False),
        'file_result': StringParameter(default=''),
    }

    # We only work at the stopping phase, when all is finish, to get back our discovery
    def stopping_agent(self):
        enabled = self.get_parameter('enabled')
        if not enabled:
            return
        groups = gossiper.groups  # no need to copy, the group pointer is in read only
        self.logger.info(
            'Pushing back ours groups and discovery informations to Shinken Enterprise'
        )

        collectors_data = {}
        for (ccls, e) in collectormgr.collectors.items():
            cname, c = collectormgr.get_collector_json_extract(e)
            collectors_data[cname] = c

        # In groups=> templates, we do not want : and . in the names
        _mapping = {':': '--', '.': '--'}
        use_value = ','.join(groups)
        for (k, v) in _mapping.items():
            use_value = use_value.replace(k, v)

        payload = {
            '_AGENT_UUID': gossiper.uuid,
            'use': use_value,
        }

        # System info
        system_results = collectors_data.get('system', {}).get('results', {})

        hostname = system_results.get('hostname', '')
        payload['host_name'] = hostname

        fqdn = system_results.get('fqdn', '')
        if fqdn:
            payload['_FQDN'] = fqdn

        publicip = system_results.get('publicip', '')
        if publicip:
            payload['_PUBLIC_IP'] = publicip

        # which address to use in fact?
        # how to choose:   fqdn > public_ip > hostname
        if fqdn:
            payload['address'] = fqdn
        elif publicip:
            payload['address'] = publicip
        else:
            payload['address'] = hostname

        # Timezone
        timezone = collectors_data.get('timezone',
                                       {}).get('results',
                                               {}).get('timezone', '')
        if timezone:
            payload['_TIMEZONE'] = bytes_to_unicode(timezone)

        cpucount = system_results.get('cpucount', '')
        if cpucount:
            payload['_CPU_COUNT'] = str(cpucount)  # data must be string

        linux_distribution = system_results.get('os', {}).get('linux', {}).get(
            'distribution', '')
        if linux_distribution:
            payload['_LINUX_DISTRIBUTION'] = linux_distribution

        # Memory
        physical_memory = collectors_data.get('timezone', {}).get(
            'results', {}).get('phys_total', '')
        if physical_memory:
            payload['_PHYSICAL_MEMORY'] = physical_memory

        # Network
        try:
            network_interfaces = ','.join(
                collectors_data.get('interfaces', {}).get('results',
                                                          {}).keys())
        except AttributeError:  # was without interfaces
            network_interfaces = ''
        if network_interfaces:
            payload['_NETWORK_INTERFACES'] = network_interfaces

        # Geoloc (lat and long)
        try:
            geoloc = collectors_data.get('geoloc', {}).get('results',
                                                           {}).get('loc', '')
        except AttributeError:  # was without interfaces
            geoloc = ''
        if geoloc and geoloc.count(',') == 1:
            lat, long = geoloc.split(',', 1)
            payload['_LAT'] = lat
            payload['_LONG'] = long

        # disks
        try:
            volumes = ','.join(
                collectors_data.get('diskusage', {}).get('results', {}).keys())
        except AttributeError:
            volumes = ''
        if volumes:
            payload['_VOLUMES'] = volumes

        file_result = self.get_parameter('file_result')
        self.logger.info('Writing file result to : %s' % file_result)
        if file_result:
            f = open(file_result, 'w')
            f.write(jsoner.dumps(payload, indent=4))
            f.close()
Exemplo n.º 8
0
class MailHandlerModule(HandlerModule):
    implement = 'mail'

    parameters = {
        'enabled':
        BoolParameter(default=False),
        'severities':
        StringListParameter(default=['ok', 'warning', 'critical', 'unknown']),
        'contacts':
        StringListParameter(default=['*****@*****.**']),
        'addr_from':
        StringParameter(default='*****@*****.**'),
        'smtp_server':
        StringParameter(default='localhost'),
        'smtps':
        BoolParameter(default=False),
        'check_subject_template':
        StringParameter(default='mail-check-subject.tpl'),
        'check_text_template':
        StringParameter(default='mail-check-text.tpl'),
        'group_subject_template':
        StringParameter(default='mail-group-subject.tpl'),
        'group_text_template':
        StringParameter(default='mail-group-text.tpl'),
        'compliance_subject_template':
        StringParameter(default='mail-compliance-subject.tpl'),
        'compliance_text_template':
        StringParameter(default='mail-compliance-text.tpl'),
    }

    def __init__(self):
        super(MailHandlerModule, self).__init__()
        self.jinja2 = libstore.get_jinja2()
        self.smtplib = None

        # Check templates, to load them only once
        self.__computed_templates = {
            'check': {
                'subject': None,
                'text': None
            },
            'group': {
                'subject': None,
                'text': None
            },
            'compliance': {
                'subject': None,
                'text': None
            },
        }

    def __send_email(self, addr_from, msg, about_what):

        # Lazy load smtplib
        if self.smtplib is None:
            import smtplib
            self.smtplib = smtplib

        smtp_server = self.get_parameter("smtp_server")
        smtps = self.get_parameter("smtps")
        contacts = self.get_parameter('contacts')

        try:
            self.logger.debug("Handler: MAIL connection to %s" % smtp_server)
            s = self.smtplib.SMTP(smtp_server, timeout=30)
            r = s.sendmail(addr_from, contacts, msg.as_string())
            s.quit()
            self.logger.info('Did send an email to %d contacts (%s) about %s' %
                             (len(contacts), ','.join(contacts), about_what))
        except Exception:
            self.logger.error('Cannot send mail: %s' % traceback.format_exc())

    def __get_msg(self, addr_from, subject_m, text_m):
        from email.mime.text import MIMEText
        from email.header import Header

        msg = MIMEText(text_m, 'plain', 'utf-8')
        msg['From'] = addr_from
        msg['Subject'] = Header(subject_m, 'utf-8')

        return msg

    def __get_computed_template(self, for_what, which_template):
        what_entry = self.__computed_templates[for_what]
        return what_entry[which_template]

    def __load_and_compute_one_template(self, for_what, which_template):
        templates_dir = os.path.join(self.pack_directory, 'templates')
        pth = self.get_parameter('%s_%s_template' % (for_what, which_template))
        full_pth = os.path.join(templates_dir, pth)
        if not os.path.exists(full_pth):
            self.logger.error('Missing template file %s_%s_template: %s' %
                              (for_what, which_template, full_pth))
            return False
        try:
            with codecs.open(full_pth, 'r', 'utf8') as f:
                buf = f.read()
        except Exception as exp:
            self.logger.error(
                'Cannot load template file %s_%s_template (%s) : %s' %
                (for_what, which_template, full_pth, exp))
            return False
        try:
            tpl = self.jinja2.Template(buf)
        except Exception as exp:
            self.logger.error(
                'The template %s_%s_template (%s) did raised an error when parsing: %s'
                % (for_what, which_template, full_pth, exp))
            return False
        # Ok we can save it
        what_entry = self.__computed_templates[for_what]
        what_entry[which_template] = tpl
        return True

    def __compute_templates(self, for_what):
        # Maybe it's already computed
        subject_tpl = self.__get_computed_template(for_what, 'subject')
        text_tpl = self.__get_computed_template(for_what, 'text')
        if subject_tpl is not None and text_tpl is not None:
            return True

        success = True
        success &= self.__load_and_compute_one_template(for_what, 'subject')
        success &= self.__load_and_compute_one_template(for_what, 'text')

        subject_tpl = self.__get_computed_template(for_what, 'subject')
        text_tpl = self.__get_computed_template(for_what, 'text')
        return subject_tpl is not None and text_tpl is not None

    def send_mail_check(self, check):
        have_templates = self.__compute_templates('check')
        if not have_templates:
            self.logger.error(
                'We do not have templates available, skiping the email sending'
            )
            return
        subject_tpl = self.__get_computed_template('check', 'subject')
        text_tpl = self.__get_computed_template('check', 'text')
        try:
            _time = datetime.datetime.fromtimestamp(int(
                time.time())).strftime('%Y-%m-%d %H:%M:%S')
            subject_m = subject_tpl.render(check=check, _time=_time)
            text_m = text_tpl.render(check=check, _time=_time)
            addr_from = self.get_parameter('addr_from')
            msg = self.__get_msg(addr_from, subject_m, text_m)

            self.__send_email(addr_from, msg, 'check state change')
        except:
            self.logger.error('Cannot send mail for check: %s' %
                              traceback.format_exc())

    def send_mail_group(self, group, group_modification):
        have_templates = self.__compute_templates('group')
        if not have_templates:
            self.logger.error(
                'We do not have templates available, skiping the email sending'
            )
            return
        subject_tpl = self.__get_computed_template('group', 'subject')
        text_tpl = self.__get_computed_template('group', 'text')
        try:
            _time = datetime.datetime.fromtimestamp(int(
                time.time())).strftime('%Y-%m-%d %H:%M:%S')
            subject_m = subject_tpl.render(
                group=group, group_modification=group_modification)
            text_m = text_tpl.render(group=group,
                                     group_modification=group_modification)
            addr_from = self.get_parameter('addr_from')
            msg = self.__get_msg(addr_from, subject_m, text_m)

            self.__send_email(addr_from, msg, 'group modification')
        except:
            self.logger.error('Cannot send mail for group modification: %s' %
                              traceback.format_exc())

    def send_mail_compliance(self, compliance):
        have_templates = self.__compute_templates('compliance')
        if not have_templates:
            self.logger.error(
                'We do not have templates available, skiping the email sending'
            )
            return
        subject_tpl = self.__get_computed_template('compliance', 'subject')
        text_tpl = self.__get_computed_template('compliance', 'text')
        try:
            _time = datetime.datetime.fromtimestamp(int(
                time.time())).strftime('%Y-%m-%d %H:%M:%S')
            subject_m = subject_tpl.render(compliance=compliance, _time=_time)
            text_m = text_tpl.render(compliance=compliance, _time=_time)
            addr_from = self.get_parameter('addr_from')
            msg = self.__get_msg(addr_from, subject_m, text_m)

            self.__send_email(addr_from, msg, 'compliance rule state change')
        except:
            self.logger.error(
                'Cannot send mail for compliance modification: %s' %
                traceback.format_exc())

    def handle(self, obj, event):
        enabled = self.get_parameter('enabled')
        if not enabled:
            self.logger.debug(
                'Mail module is not enabled, skipping check alert sent')
            return

        self.logger.debug('Manage an obj event: %s (event=%s)' % (obj, event))

        evt_type = event['evt_type']

        # Checks: only notify about changes
        if evt_type == 'check_execution':
            evt_data = event['evt_data']
            check_did_change = evt_data['check_did_change']
            if check_did_change:
                self.send_mail_check(obj)

        # We are launched only if the group did change
        if evt_type == 'group_change':
            evt_data = event['evt_data']
            group_modification = evt_data['modification']
            self.send_mail_group(obj, group_modification)

        # Compliance: only when change, and only some switch cases should be
        # notify (drop useless changes)
        if evt_type == 'compliance_execution':
            evt_data = event['evt_data']
            compliance_did_change = evt_data['compliance_did_change']
            if compliance_did_change:
                self.send_mail_compliance(obj)
Exemplo n.º 9
0
class Nginx(Collector):
    parameters = {
        'uri': StringParameter(default='http://localhost/nginx_status'),
    }
    
    def __init__(self):
        super(Nginx, self).__init__()
        self.nginxRequestsStore = None
        
    
    def launch(self):
        logger = self.logger
        
        if not self.is_in_group('nginx'):
            self.set_not_eligible('Please add the nginx group to enable this collector.')
            return
        
        logger.debug('getNginxStatus: start')
        
        logger.debug('getNginxStatus: config set')
        
        try:
            response = httper.get(self.get_parameter('uri'), timeout=3)
        except get_http_exceptions() as exp:
            self.set_error('Unable to get Nginx status - HTTPError = %s' % exp)
            return False
            
        logger.debug('getNginxStatus: urlopen success, start parsing')
        
        # Thanks to http://hostingfu.com/files/nginx/nginxstats.py for this code
        
        logger.debug('getNginxStatus: parsing connections')
        
        try:
            # Connections
            parsed = re.search(r'Active connections:\s+(\d+)', response)
            connections = int(parsed.group(1))
            
            logger.debug('getNginxStatus: parsed connections')
            logger.debug('getNginxStatus: parsing reqs')
            
            # Requests per second
            parsed = re.search(r'\s*(\d+)\s+(\d+)\s+(\d+)', response)
            
            if not parsed:
                logger.debug('getNginxStatus: could not parse response')
                return False
            
            requests = int(parsed.group(3))
            
            logger.debug('getNginxStatus: parsed reqs')
            
            if self.nginxRequestsStore == None or self.nginxRequestsStore < 0:
                logger.debug('getNginxStatus: no reqs so storing for first time')
                self.nginxRequestsStore = requests
                requestsPerSecond = 0
            else:
                logger.debug('getNginxStatus: reqs stored so calculating')
                logger.debug('getNginxStatus: self.nginxRequestsStore = %s', self.nginxRequestsStore)
                logger.debug('getNginxStatus: requests = %s', requests)
                
                requestsPerSecond = float(requests - self.nginxRequestsStore) / 60
                logger.debug('getNginxStatus: requestsPerSecond = %s', requestsPerSecond)
                self.nginxRequestsStore = requests
            
            if connections != None and requestsPerSecond != None:
                logger.debug('getNginxStatus: returning with data')
                return {'connections': connections, 'reqPerSec': requestsPerSecond}
            else:
                logger.debug('getNginxStatus: returning without data')
                return False
        
        except Exception:
            self.set_error('Unable to get Nginx status - %s - Exception = %s' % (response, traceback.format_exc()))
            return False
Exemplo n.º 10
0
class ImraneModule(ListenerModule):
    implement = 'imrane'

    parameters = {
        'enabled': BoolParameter(default=False),
        'collector-group': StringParameter(default='imrane-collector'),
        'agregator-group': StringParameter(default='imrane-agregator'),
        'database-path': StringParameter(default='/tmp/agregator.db'),
    }

    def __init__(self):
        ListenerModule.__init__(self)

        # reaping queue
        self.queue = []

        self.enabled = False

        self.database = None
        self.cursor = None

    # Prepare to open the UDP port
    def prepare(self):
        self.logger.debug('IMRANE: prepare phase')

        self.enabled = self.get_parameter('enabled')

        if self.enabled:
            self.logger.info("IMRANE: starting")
        else:
            self.logger.info('IMRANE is not enabled, skipping it')

    def get_info(self):
        state = 'STARTED' if self.enabled else 'DISABLED'
        log = ''
        return {'configuration': self.get_config(), 'state': state, 'log': log}

    def launch(self):
        threader.create_and_launch(self.launch_database_thread,
                                   name='Database thread',
                                   essential=True,
                                   part='imrane')
        threader.create_and_launch(self.launch_collector_thread,
                                   name='Collector thread',
                                   essential=True,
                                   part='imrane')

    def _import_data(self, data):
        results = data['results']
        from_name = data['from']
        self.queue.append((from_name, results))

    def stopping_agent(self):
        if self.database:
            self.logger.info('Closing database')
            self.database.commit()
            self.database.close()

    # Same but for the TCP connections
    # TODO: use a real daemon part for this, this is not ok for fast receive
    def launch_database_thread(self):
        while not stopper.is_stop():
            agregator_group = self.get_parameter('agregator-group')
            database_enabled = gossiper.is_in_group(agregator_group)

            if not database_enabled:
                self.logger.debug('IMRANE: not a database thread')
                time.sleep(1)
                continue

            if self.database is None:
                database_path = self.get_parameter('database-path')
                self.database = sqlite3.connect(database_path)

                self.cursor = self.database.cursor()
                # Create data
                # TODO: check if not already exists
                tb_exists = "SELECT name FROM sqlite_master WHERE type='table' AND name='Data'"
                if not self.cursor.execute(tb_exists).fetchone():
                    self.cursor.execute(
                        "CREATE TABLE Data(id INTEGER PRIMARY KEY, Epoch INTEGER, HostName TEXT, KeyName TEXT, Value TEXT)"
                    )

            self.logger.info('IMRANE: database loop')
            self.logger.info('IMRANE: manage: %s' % self.queue)

            # Switch to avoid locking
            queue = self.queue
            self.queue = []

            now = int(time.time())
            for (from_name, results) in queue:
                self.logger.info('SAVING INTO DATABASE: %s => %s' %
                                 (from_name, results))
                # TODO: database code
                for (key, value) in results.items():
                    q = '''INSERT INTO Data(Epoch, HostName, KeyName, Value) VALUES (%s,'%s','%s','%s')''' % (
                        now, from_name, key, value)
                    self.logger.info('EXECUTING: %s' % q)
                    self.cursor.execute(q)
            self.database.commit()

            time.sleep(1)

    # Same but for the TCP connections
    # TODO: use a real daemon part for this, this is not ok for fast receive
    def launch_collector_thread(self):
        last_collector_check = 0
        while not stopper.is_stop():
            collector_group = self.get_parameter('collector-group')
            collector_enabled = gossiper.is_in_group(collector_group)

            if not collector_enabled:
                self.logger.debug('IMRANE: not a collector thread')
                time.sleep(1)
                continue
            self.logger.debug('IMRANE: collector loop')
            self.logger.debug('IMRANE: manage: %s' % self.queue)
            imrane_collector = None
            for collector in collectormgr.collectors.values():
                name = collector['name']
                if name == 'imrane':
                    imrane_collector = collector
                    break
            if imrane_collector is None:
                self.logger.error(
                    'IMRANE: cannot find the imrane collector, skiping this loop'
                )
                time.sleep(1)
                continue

            # Maybe this collector did not run since we last look at it, if so, skip it
            last_check = imrane_collector['last_check']
            if last_check == last_collector_check:
                self.logger.debug(
                    'IMRANE: the collector did not run since the last loop, skiping this turn'
                )
                time.sleep(1)
                continue
            last_collector_check = last_check

            results = imrane_collector['results']
            self.logger.info('IMRANE: collector result: %s' % results)

            our_node = gossiper.get(gossiper.uuid)
            our_node_name = our_node['name']

            agregator_group = self.get_parameter('agregator-group')
            agregator_nodes = gossiper.find_group_nodes(agregator_group)
            if len(agregator_nodes) == 0:
                self.logger.error(
                    'IMRANE ERROR: there are no agregator nodes, skiping data sending'
                )
                time.sleep(1)
                continue

            agregator_node_uuid = random.choice(agregator_nodes)
            agregator_node = gossiper.get(agregator_node_uuid)
            if agregator_node is None:  # oups: thread race bug
                time.sleep(1)
                continue

            address = agregator_node['addr']
            port = agregator_node['port']
            display_name = agregator_node['display_name']
            self.logger.info('IMRANE: did choose %s (%s:%s) for sending' %
                             (display_name, address, port))

            uri = 'http://%s:%s/imrane' % (address, port)
            try:
                r = httper.post(
                    uri,
                    params={
                        'results': results,
                        'from': our_node_name
                    },
                    headers={'Content-Type': 'application/json;charset=UTF-8'})
                self.logger.debug("Result insert", r)
            except get_http_exceptions() as exp:
                self.logger.error('Cannot connect to agregator: %s' % exp)

            # always sleep to not hammer the CPU
            time.sleep(1)

    # Export end points to get/list TimeSeries
    def export_http(self):
        @http_export('/imrane', method='POST')
        @http_export('/imrane/', method='POST')
        def get_ts_values():
            self.logger.info('CALLING /imrane POST')
            try:
                data_raw = request.body.getvalue()
                self.logger.info('POST: get body value: %s' % data_raw)
                data = jsoner.loads(data_raw)
                self.logger.info('POST: get results: %s' % data)
                self._import_data(data)
            except:
                self.logger.error('IMRANE: ERROR %s' % traceback.format_exc())
            return None
Exemplo n.º 11
0
class SlackHandlerModule(HandlerModule):
    implement = 'slack'

    parameters = {
        'enabled_if_group':
        StringParameter(default='slack'),
        'severities':
        StringListParameter(default=['ok', 'warning', 'critical', 'unknown']),
        'token':
        StringParameter(default=''),
        'channel':
        StringParameter(default='#alerts'),
    }

    def __init__(self):
        super(SlackHandlerModule, self).__init__()
        self.enabled = False

    def prepare(self):
        if_group = self.get_parameter('enabled_if_group')
        self.enabled = gossiper.is_in_group(if_group)

    def get_info(self):
        state = 'STARTED' if self.enabled else 'DISABLED'
        log = ''
        return {'configuration': self.get_config(), 'state': state, 'log': log}

    def __try_to_send_message(self, slack, attachments, channel):
        r = slack.chat.post_message(channel=channel,
                                    text='',
                                    as_user=True,
                                    attachments=attachments)
        self.logger.debug('[SLACK] return of the send: %s %s %s' %
                          (r.successful, r.__dict__['body']['channel'],
                           r.__dict__['body']['ts']))

    def __get_token(self):
        token = self.get_parameter('token')
        if not token:
            token = os.environ.get('SLACK_TOKEN', '')
        return token

    def __send_slack_check(self, check):
        token = self.__get_token()

        if not token:
            self.logger.error(
                '[SLACK] token is not configured on the slack module. skipping slack messages.'
            )
            return
        slack = Slacker(token)
        # title = '{date_num} {time_secs} [node:`%s`][addr:`%s`] Check `%s` is going %s' % (gossiper.display_name, gossiper.addr, check['name'], check['state'])
        content = check['output']
        channel = self.get_parameter('channel')
        colors = {'ok': 'good', 'warning': 'warning', 'critical': 'danger'}
        node_name = '%s (%s)' % (gossiper.name, gossiper.addr)
        if gossiper.display_name:
            node_name = '%s [%s]' % (node_name, gossiper.display_name)
        attachment = {
            "pretext": ' ',
            "text": content,
            'color': colors.get(check['state'], '#764FA5'),
            'author_name': node_name,
            'footer': 'Send by OpsBro on %s' % node_name,
            'ts': int(time.time())
        }
        fields = [
            {
                "title": "Node",
                "value": node_name,
                "short": True
            },
            {
                "title": "Check",
                "value": check['name'],
                "short": True
            },
        ]
        attachment['fields'] = fields
        attachments = [attachment]
        self.__do_send_message(slack, attachments, channel)

    def __send_slack_group(self, group, group_modification):
        token = self.__get_token()

        if not token:
            self.logger.error(
                '[SLACK] token is not configured on the slack module. skipping slack messages.'
            )
            return
        slack = Slacker(token)
        # title = '{date_num} {time_secs} [node:`%s`][addr:`%s`] Check `%s` is going %s' % (gossiper.display_name, gossiper.addr, check['name'], check['state'])
        content = 'The group %s was %s' % (group, group_modification)
        channel = self.get_parameter('channel')
        colors = {'remove': 'danger', 'add': 'good'}
        node_name = '%s (%s)' % (gossiper.name, gossiper.addr)
        if gossiper.display_name:
            node_name = '%s [%s]' % (node_name, gossiper.display_name)
        attachment = {
            "pretext": ' ',
            "text": content,
            'color': colors.get(group_modification, '#764FA5'),
            'author_name': node_name,
            'footer': 'Send by OpsBro on %s' % node_name,
            'ts': int(time.time())
        }
        fields = [
            {
                "title": "Node",
                "value": node_name,
                "short": True
            },
            {
                "title": "Group:%s" % group_modification,
                "value": group,
                "short": True
            },
        ]
        attachment['fields'] = fields
        attachments = [attachment]
        self.__do_send_message(slack, attachments, channel)

    def __send_slack_compliance(self, compliance):
        token = self.__get_token()

        if not token:
            self.logger.error(
                '[SLACK] token is not configured on the slack module. skipping slack messages.'
            )
            return
        slack = Slacker(token)
        # title = '{date_num} {time_secs} [node:`%s`][addr:`%s`] Check `%s` is going %s' % (gossiper.display_name, gossiper.addr, check['name'], check['state'])
        content = 'The compliance %s changed from %s to %s' % (
            compliance.get_name(), compliance.get_state(),
            compliance.get_old_state())
        channel = self.get_parameter('channel')
        state_color = COMPLIANCE_STATE_COLORS.get(compliance.get_state())
        color = {
            'magenta': '#221220',
            'green': 'good',
            'cyan': '#cde6ff',
            'red': 'danger',
            'grey': '#cccccc'
        }.get(state_color, '#cccccc')
        node_name = '%s (%s)' % (gossiper.name, gossiper.addr)
        if gossiper.display_name:
            node_name = '%s [%s]' % (node_name, gossiper.display_name)
        attachment = {
            "pretext": ' ',
            "text": content,
            'color': color,
            'author_name': node_name,
            'footer': 'Send by OpsBro on %s' % node_name,
            'ts': int(time.time())
        }
        fields = [
            {
                "title": "Node",
                "value": node_name,
                "short": True
            },
            {
                "title": "Compliance:%s" % compliance.get_name(),
                "value": compliance.get_state(),
                "short": True
            },
        ]
        attachment['fields'] = fields
        attachments = [attachment]
        self.__do_send_message(slack, attachments, channel)

    def __do_send_message(self, slack, attachments, channel):
        try:
            self.__try_to_send_message(slack, attachments, channel)
        except Exception as exp:
            self.logger.error('[SLACK] Cannot send alert: %s (%s) %s %s %s' %
                              (exp, type(exp), str(exp), str(exp)
                               == 'channel_not_found', exp.__dict__))
            # If it's just that the channel do not exists, try to create it
            if str(exp) == 'channel_not_found':
                try:
                    self.logger.info(
                        '[SLACK] Channel %s do no exists. Trying to create it.'
                        % channel)
                    slack.channels.create(channel)
                except Exception as exp:
                    self.logger.error('[SLACK] Cannot create channel %s: %s' %
                                      (channel, exp))
                    return
                # Now try to resend the message
                try:
                    self.__try_to_send_message(slack, attachments, channel)
                except Exception as exp:
                    self.logger.error(
                        '[SLACK] Did create channel %s but we still cannot send the message: %s'
                        % (channel, exp))

    def handle(self, obj, event):
        if_group = self.get_parameter('enabled_if_group')
        self.enabled = gossiper.is_in_group(if_group)
        if not self.enabled:
            self.logger.debug(
                'Slack module is not enabled, skipping check alert sent')
            return

        self.logger.debug('Manage an obj event: %s (event=%s)' % (obj, event))

        evt_type = event['evt_type']
        if evt_type == 'check_execution':
            evt_data = event['evt_data']
            check_did_change = evt_data['check_did_change']
            if check_did_change:
                self.__send_slack_check(obj)

        if evt_type == 'group_change':
            evt_data = event['evt_data']
            group_modification = evt_data['modification']
            self.__send_slack_group(obj, group_modification)

        # Compliance: only when change, and only some switch cases should be
        # notify (drop useless changes)
        if evt_type == 'compliance_execution':
            evt_data = event['evt_data']
            compliance_did_change = evt_data['compliance_did_change']
            if compliance_did_change:
                self.__send_slack_compliance(obj)
Exemplo n.º 12
0
class SynologyModule(ListenerModule):
    implement = 'synology'

    parameters = {
        'enabled': BoolParameter(default=False),
        'export_uri':
        StringParameter(default='http://92.222.35.193:8080/synology'),
        'customer_key': StringParameter(default=''),
        'inventory_number': StringParameter(default=''),
    }

    def __init__(self):
        ListenerModule.__init__(self)

        # Graphite reaping queue
        self.graphite_queue = []

        self.enabled = False
        self.export_uri = ''
        self.customer_key = ''
        self.inventory_number = ''

    # Prepare to open the UDP port
    def prepare(self):
        self.logger.debug('Synology: prepare phase')

        self.enabled = self.get_parameter('enabled')
        self.export_uri = self.get_parameter('export_uri')

    def get_info(self):
        state = 'STARTED' if self.enabled else 'DISABLED'
        log = ''
        return {'configuration': self.get_config(), 'state': state, 'log': log}

    def launch(self):
        threader.create_and_launch(self.launch_main,
                                   name='Synology',
                                   essential=True,
                                   part='synology')

    # Thread for listening to the graphite port in UDP (2003)
    def launch_main(self):
        while not stopper.is_stop():
            self.enabled = self.get_parameter('enabled')
            if not self.enabled:
                time.sleep(1)
                continue
            self.export_uri = self.get_parameter('export_uri')
            self.customer_key = self.get_parameter('customer_key')
            self.inventory_number = self.get_parameter('inventory_number')
            if not self.customer_key:
                self.warning('You must have a customer key')
                time.sleep(1)
                continue

            syno_collector = collectormgr.collectors.get('synology', None)
            if syno_collector is None:
                self.logger.error('The synology collector is missing')
                time.sleep(1)
                continue

            results = syno_collector.get('results', None)
            if results is None:
                self.logger.warning('The synology collector did not run')
                time.sleep(1)
                continue

            try:
                r = httper.post(self.export_uri,
                                params={
                                    'uuid': gossiper.uuid,
                                    'customer_key': self.customer_key,
                                    'inventory_number': self.inventory_number,
                                    'results': results
                                },
                                headers={})
                self.logger.debug("Result insert", r)
            except get_http_exceptions() as exp:
                self.logger.error(
                    'Cannot connect to export uri datasources: %s' % exp)
            time.sleep(1)
Exemplo n.º 13
0
class GrafanaModule(ConnectorModule):
    implement = 'grafana'

    parameters = {
        'enabled_if_group': StringParameter(default='grafana-connector'),
        'uri': StringParameter(default='http://localhost:3000'),
        'api_key': StringParameter(default=''),
    }

    def __init__(self):
        super(GrafanaModule, self).__init__()
        self.enabled = False
        self.enabled_if_group = 'grafana-connector'
        self.uri = 'http://localhost:3000'
        self.api_key = ''

    def prepare(self):
        self.logger.info('Grafana: prepare phase')
        self.uri = self.get_parameter('uri')
        self.api_key = self.get_parameter('api_key')

    def __get_headers(self):
        return {
            'Content-Type': 'application/json;charset=UTF-8',
            'Authorization': 'Bearer %s' % self.api_key
        }

    def insert_node_into_grafana(self, nuuid):
        node = gossiper.get(nuuid)
        if node is None:
            return
        name = node['name']
        addr = node['addr']
        port = node['port']
        data_source_name = "%s--opsbro--%s" % (name, nuuid)
        entry = {
            "name": data_source_name,
            "type": "graphite",
            "url": "http://%s:%d" % (addr, port),
            "access": "proxy"
        }
        uri = '%s/api/datasources' % (self.uri)
        try:
            r = httper.post(uri, params=entry, headers=self.__get_headers())
            self.logger.debug("Result insert", r)
        except get_http_exceptions() as exp:
            self.logger.error('Cannot connect to grafana datasources: %s' %
                              exp)
            return

    def remove_data_source(self, data_source_id):
        self.logger.info(
            'Cleaning data source %d from grafana because the node is no more'
            % data_source_id)
        uri = '%s/api/datasources/%d' % (self.uri, data_source_id)
        try:
            r = httper.delete(uri, headers=self.__get_headers())
            self.logger.debug("Result delete", r)
        except get_http_exceptions() as exp:
            self.logger.error('Cannot connect to grafana datasources: %s' %
                              exp)
            return

    def get_data_sources_from_grafana(self):
        uri = '%s/api/datasources' % (self.uri)
        our_data_sources = {}
        try:
            api_return = httper.get(uri, headers=self.__get_headers())
            try:
                all_data_sources = jsoner.loads(api_return)
            except (ValueError, TypeError) as exp:
                self.logger.error(
                    'Cannot load json from grafana datasources: %s' % exp)
                return None
        except get_http_exceptions() as exp:
            self.logger.error('Cannot connect to grafana datasources: %s' %
                              exp)
            return None
        self.logger.debug("All data sources")
        self.logger.debug(str(all_data_sources))
        # Error message is a dict with just a key: message
        if isinstance(all_data_sources, dict):
            error_message = all_data_sources.get('message', '')
            if error_message:
                if error_message == 'Unauthorized':
                    self.logger.error(
                        'Your API key is not autorized to list data sources.')
                    return None
                self.logger.error('Unknown error from grafana API: %s' %
                                  error_message)
                return None

        # A data source will look like this:
        # [{u'name'    : u'SuperBla',
        ##  u'database': u'',
        # u'url': u'http://super:6768',
        #  u'basicAuth': False,
        # u'jsonData': {},
        # u'access': u'proxy',
        # u'typeLogoUrl': u'public/app/plugins/datasource/graphite/img/graphite_logo.png',
        # u'orgId': 1,
        # u'user': u'',
        #  u'password': u'',
        # u'type': u'graphite',
        #  u'id': 1,
        # u'isDefault': False}]
        for data_source in all_data_sources:
            if data_source.get('type', '') != 'graphite':
                continue
            src_name = data_source.get('name', '')
            if '--opsbro--' in src_name:
                elts = src_name.split('--opsbro--')
                if len(elts) == 2:
                    nuuid = elts[1]
                    our_data_sources[nuuid] = data_source
        return our_data_sources

    def launch(self):
        threader.create_and_launch(
            self.do_launch,
            name='Grafana module data sources synchronizer',
            essential=True,
            part='grafana')

    def do_launch(self):
        while not stopper.is_stop():
            self.logger.debug('Grafana loop')

            # We go in enabled when, and only when our group is matching what we do expect
            if_group = self.get_parameter('enabled_if_group')
            self.enabled = gossiper.is_in_group(if_group)

            # Ok, if we are not enabled, so not even talk to grafana
            if not self.enabled:
                time.sleep(1)
                continue

            # Ok now time to work
            nodes_in_grafana = self.get_data_sources_from_grafana()

            # If we have an issue to grafana, skip this loop
            if nodes_in_grafana is None:
                time.sleep(1)
                continue
            nodes_in_grafana_set = set(nodes_in_grafana.keys())

            gossip_nodes_uuids = gossiper.nodes.keys(
            )  # note: nodes is a static dict, no need to lock it
            gossip_nodes_uuids = set(gossip_nodes_uuids)

            self.logger.debug("Nodes in grafana", nodes_in_grafana_set)
            self.logger.debug("Nodes in gossip", gossip_nodes_uuids)
            nodes_that_must_be_clean = nodes_in_grafana_set - gossip_nodes_uuids
            nodes_to_insert = gossip_nodes_uuids - nodes_in_grafana_set
            self.logger.debug("Nodes that must be clean",
                              nodes_that_must_be_clean)
            self.logger.debug("Nodes to insert into grafana", nodes_to_insert)
            for nuuid in nodes_to_insert:
                self.logger.debug("Nodes", nuuid,
                                  "must be inserted into grafana")
                self.insert_node_into_grafana(nuuid)

            for nuuid in nodes_that_must_be_clean:
                node_data_source_id = nodes_in_grafana[nuuid]['id']
                self.logger.debug(
                    "Node ", nuuid,
                    "is no more need in grafana. Removing its data source")
                self.remove_data_source(node_data_source_id)

            # Do not hammer the cpu
            time.sleep(1)
Exemplo n.º 14
0
class Mysql(Collector):
    parameters = {
        'server'             : StringParameter(default='127.0.0.1'),
        'user'               : StringParameter(default='root'),
        'password'           : StringParameter(default=''),
        'port'               : IntParameter(default=3306),
        'socket'             : StringParameter(default='/var/lib/mysql/mysql.sock'),
        'replication_enabled': BoolParameter(default=False)
    }
    
    
    def __init__(self):
        super(Mysql, self).__init__()
        self.MySQLdb = None
        self.mysqlVersion = None
        self.mysqlConnectionsStore = None
        self.mysqlSlowQueriesStore = None
    
    
    def launch(self):
        
        logger = self.logger
        logger.debug('getMySQLStatus: start')
        
        if not self.is_in_group('mysql'):
            self.set_not_eligible('Please add the mysql group to enable this collector.')
            return
        
        if self.MySQLdb is None:
            # Try import MySQLdb, if installed on the system
            try:
                import MySQLdb
                self.MySQLdb = MySQLdb
            except ImportError as exp1:
                try:
                    mydir = os.path.dirname(__file__)
                    sys.path.insert(0, mydir)
                    import pymysql as MySQLdb
                    self.MySQLdb = MySQLdb
                    sys.path = sys.path[1:]
                except ImportError as exp2:
                    sys.path = sys.path[1:]
                    self.set_error('Unable to import MySQLdb (%s) or embedded pymsql (%s)' % (exp1, exp2))
                    return False
        
        host = self.get_parameter('server')
        user = self.get_parameter('user')
        password = self.get_parameter('password')
        port = self.get_parameter('port')
        mysql_socket = self.get_parameter('socket')
        
        # You can connect with socket or TCP
        if not mysql_socket:
            try:
                db = self.MySQLdb.connect(host=host, user=user, passwd=password, port=port)
            except self.MySQLdb.OperationalError as exp:  # ooooups
                self.set_error('MySQL connection error (server): %s' % exp)
                return False
        elif hasattr(socket, 'AF_UNIX'):
            try:
                db = self.MySQLdb.connect(host='localhost', user=user, passwd=password, port=port, unix_socket=mysql_socket)
            except self.MySQLdb.OperationalError as exp:
                self.set_error('MySQL connection error (socket): %s' % exp)
                return False
        else:
            self.set_error('MySQL is set to connect with unix socket but it is not available for windows.')
            return False
        
        logger.debug('getMySQLStatus: connected')
        
        # Get MySQL version
        if self.mysqlVersion is None:
            logger.debug('getMySQLStatus: mysqlVersion unset storing for first time')
            try:
                cursor = db.cursor()
                cursor.execute('SELECT VERSION()')
                result = cursor.fetchone()
            except self.MySQLdb.OperationalError as message:
                logger.error('getMySQLStatus: MySQL query error when getting version: %s', message)
            
            version = result[0].split('-')  # Might include a description e.g. 4.1.26-log. See http://dev.mysql.com/doc/refman/4.1/en/information-functions.html#function_version
            version = version[0].split('.')
            self.mysqlVersion = []
            
            for string in version:
                number = re.match('([0-9]+)', string)
                number = number.group(0)
                self.mysqlVersion.append(number)
        
        logger.debug('getMySQLStatus: getting Connections')
        
        # Connections
        try:
            cursor = db.cursor()
            cursor.execute('SHOW STATUS LIKE "Connections"')
            result = cursor.fetchone()
        except self.MySQLdb.OperationalError as message:
            logger.error('getMySQLStatus: MySQL query error when getting Connections = %s', message)
        
        if self.mysqlConnectionsStore is None:
            logger.debug('getMySQLStatus: mysqlConnectionsStore unset storing for first time')
            self.mysqlConnectionsStore = result[1]
            connections = 0
        else:
            logger.debug('getMySQLStatus: mysqlConnectionsStore set so calculating')
            logger.debug('getMySQLStatus: self.mysqlConnectionsStore = %s', self.mysqlConnectionsStore)
            logger.debug('getMySQLStatus: result = %s', result[1])
            connections = float(float(result[1]) - float(self.mysqlConnectionsStore)) / 60
            self.mysqlConnectionsStore = result[1]
        
        logger.debug('getMySQLStatus: connections  = %s', connections)
        logger.debug('getMySQLStatus: getting Connections - done')
        logger.debug('getMySQLStatus: getting Created_tmp_disk_tables')
        
        # Created_tmp_disk_tables
        
        # Determine query depending on version. For 5.02 and above we need the GLOBAL keyword
        if int(self.mysqlVersion[0]) >= 5 and int(self.mysqlVersion[2]) >= 2:
            query = 'SHOW GLOBAL STATUS LIKE "Created_tmp_disk_tables"'
        else:
            query = 'SHOW STATUS LIKE "Created_tmp_disk_tables"'
        
        try:
            cursor = db.cursor()
            cursor.execute(query)
            result = cursor.fetchone()
        except self.MySQLdb.OperationalError as message:
            logger.error('getMySQLStatus: MySQL query error when getting Created_tmp_disk_tables = %s', message)
        
        createdTmpDiskTables = float(result[1])
        
        logger.debug('getMySQLStatus: createdTmpDiskTables = %s', createdTmpDiskTables)
        logger.debug('getMySQLStatus: getting Created_tmp_disk_tables - done')
        logger.debug('getMySQLStatus: getting Max_used_connections')
        
        # Max_used_connections
        try:
            cursor = db.cursor()
            cursor.execute('SHOW STATUS LIKE "Max_used_connections"')
            result = cursor.fetchone()
        except self.MySQLdb.OperationalError as message:
            logger.error('getMySQLStatus: MySQL query error when getting Max_used_connections = %s', message)
        
        maxUsedConnections = int(result[1])
        logger.debug('getMySQLStatus: maxUsedConnections = %s', createdTmpDiskTables)
        logger.debug('getMySQLStatus: getting Max_used_connections - done')
        logger.debug('getMySQLStatus: getting Open_files')
        
        # Open_files
        try:
            cursor = db.cursor()
            cursor.execute('SHOW STATUS LIKE "Open_files"')
            result = cursor.fetchone()
        except self.MySQLdb.OperationalError as message:
            logger.error('getMySQLStatus: MySQL query error when getting Open_files = %s', message)
        
        openFiles = int(result[1])
        
        logger.debug('getMySQLStatus: openFiles = %s', openFiles)
        logger.debug('getMySQLStatus: getting Open_files - done')
        
        # Slow_queries
        logger.debug('getMySQLStatus: getting Slow_queries')
        
        # Determine query depending on version. For 5.02 and above we need the GLOBAL keyword (case 31015)
        if int(self.mysqlVersion[0]) >= 5 and int(self.mysqlVersion[2]) >= 2:
            query = 'SHOW GLOBAL STATUS LIKE "Slow_queries"'
        else:
            query = 'SHOW STATUS LIKE "Slow_queries"'
        try:
            cursor = db.cursor()
            cursor.execute(query)
            result = cursor.fetchone()
        except self.MySQLdb.OperationalError as message:
            logger.error('getMySQLStatus: MySQL query error when getting Slow_queries = %s', message)
        
        if self.mysqlSlowQueriesStore is None:
            logger.debug('getMySQLStatus: mysqlSlowQueriesStore unset so storing for first time')
            self.mysqlSlowQueriesStore = result[1]
            slowQueries = 0
        
        else:
            logger.debug('getMySQLStatus: mysqlSlowQueriesStore set so calculating')
            logger.debug('getMySQLStatus: self.mysqlSlowQueriesStore = %s', self.mysqlSlowQueriesStore)
            logger.debug('getMySQLStatus: result = %s', result[1])
            
            slowQueries = float(float(result[1]) - float(self.mysqlSlowQueriesStore)) / 60
            
            self.mysqlSlowQueriesStore = result[1]
        
        logger.debug('getMySQLStatus: slowQueries = %s', slowQueries)
        logger.debug('getMySQLStatus: getting Slow_queries - done')
        logger.debug('getMySQLStatus: getting Table_locks_waited')
        
        # Table_locks_waited
        try:
            cursor = db.cursor()
            cursor.execute('SHOW STATUS LIKE "Table_locks_waited"')
            result = cursor.fetchone()
        except self.MySQLdb.OperationalError as message:
            logger.error('getMySQLStatus: MySQL query error when getting Table_locks_waited = %s', message)
        
        tableLocksWaited = float(result[1])
        
        logger.debug('getMySQLStatus: tableLocksWaited  = %s', tableLocksWaited)
        logger.debug('getMySQLStatus: getting Table_locks_waited - done')
        logger.debug('getMySQLStatus: getting Threads_connected')
        
        # Threads_connected
        try:
            cursor = db.cursor()
            cursor.execute('SHOW STATUS LIKE "Threads_connected"')
            result = cursor.fetchone()
        except self.MySQLdb.OperationalError as message:
            logger.error('getMySQLStatus: MySQL query error when getting Threads_connected = %s', message)
        
        threadsConnected = int(result[1])
        
        logger.debug('getMySQLStatus: threadsConnected = %s', threadsConnected)
        logger.debug('getMySQLStatus: getting Threads_connected - done')
        logger.debug('getMySQLStatus: getting Seconds_Behind_Master')
        secondsBehindMaster = 0
        if self.get_parameter('replication_enabled'):
            # Seconds_Behind_Master
            try:
                cursor = db.cursor(self.MySQLdb.cursors.DictCursor)
                cursor.execute('SHOW SLAVE STATUS')
                result = cursor.fetchone()
            except self.MySQLdb.OperationalError as message:
                self.set_error('getMySQLStatus: MySQL query error when getting SHOW SLAVE STATUS = %s' % message)
                result = None
            
            if result is not None:
                try:
                    secondsBehindMaster = result['Seconds_Behind_Master']
                    logger.debug('getMySQLStatus: secondsBehindMaster = %s' % secondsBehindMaster)
                except IndexError as exp:
                    secondsBehindMaster = None
                    logger.debug('getMySQLStatus: secondsBehindMaster empty. %s' % exp)
            else:
                secondsBehindMaster = None
                logger.debug('getMySQLStatus: secondsBehindMaster empty. Result = None.')
            
            logger.debug('getMySQLStatus: getting Seconds_Behind_Master - done')
        
        return {'connections'          : connections, 'created_tmp_disk_tables': createdTmpDiskTables,
                'max_used_connections' : maxUsedConnections, 'open_files': openFiles, 'slow_queries': slowQueries,
                'table_locks_waited'   : tableLocksWaited, 'threads_connected': threadsConnected,
                'seconds_behind_master': secondsBehindMaster}
Exemplo n.º 15
0
class Apache(Collector):
    parameters = {
        'hostname': StringParameter(default='localhost'),
        'user': StringParameter(default=''),
        'password': StringParameter(default=''),
    }

    def __init__(self):
        super(Apache, self).__init__()
        self.apacheTotalAccesses = None

    def launch(self):

        if not self.is_in_group('apache'):
            self.set_not_eligible(
                'Please add the apache group to enable this collector.')
            return

        logger = self.logger
        logger.debug('getApacheStatus: start')
        '''
                    passwordMgr = urllib2.HTTPPasswordMgrWithDefaultRealm()
                    passwordMgr.add_password(None, self.config['apacheStatusUrl'], self.config['apacheStatusUser'],
                                             self.config['apacheStatusPass'])

                    handler = urllib2.HTTPBasicAuthHandler(passwordMgr)

                    # create "opener" (OpenerDirector instance)
                    opener = urllib2.build_opener(handler)

                    # use the opener to fetch a URL
                    opener.open(self.config['apacheStatusUrl'])

                    # Install the opener.
                    # Now all calls to urllib2.urlopen use our opener.
                    urllib2.install_opener(opener)
        '''
        try:
            uri = 'http://%s/server-status/?auto' % self.get_parameter(
                'hostname')
            user = self.get_parameter('user')
            password = self.get_parameter('password')
            response = httper.get(uri, timeout=3, user=user, password=password)
        except get_http_exceptions() as exp:
            stack = traceback.format_exc()
            self.log = stack
            self.set_error('Unable to get Apache status - Exception = %s' %
                           exp)
            return False

        logger.debug('getApacheStatus: urlopen success, start parsing')
        # Split out each line
        lines = response.split('\n')

        # Loop over each line and get the values
        apacheStatus = {}

        logger.debug('getApacheStatus: parsing, loop')

        # Loop through and extract the numerical values
        for line in lines:
            values = line.split(': ')
            try:
                apacheStatus[str(values[0])] = values[1]
            except IndexError:
                break

        logger.debug('getApacheStatus: parsed')

        res = {}

        try:
            if apacheStatus['Total Accesses'] != False:
                logger.debug('getApacheStatus: processing total accesses')
                totalAccesses = float(apacheStatus['Total Accesses'])
                if self.apacheTotalAccesses is None or self.apacheTotalAccesses <= 0 or totalAccesses <= 0:
                    res['req/s'] = 0.0
                    self.apacheTotalAccesses = totalAccesses
                    logger.debug(
                        'getApacheStatus: no cached total accesses (or totalAccesses == 0), so storing for first time / resetting stored value'
                    )
                else:
                    logger.debug(
                        'getApacheStatus: cached data exists, so calculating per sec metrics'
                    )
                    res['req/s'] = (totalAccesses -
                                    self.apacheTotalAccesses) / 60
                    self.apacheTotalAccesses = totalAccesses
            else:
                self.set_error(
                    'getApacheStatus: Total Accesses not present in mod_status output. Is ExtendedStatus enabled?'
                )
        except (IndexError, KeyError):
            self.set_error(
                'getApacheStatus: IndexError - Total Accesses not present in mod_status output. Is ExtendedStatus enabled?'
            )

        try:
            if apacheStatus['BusyWorkers'] != False and apacheStatus[
                    'IdleWorkers'] != False:
                res['busy_workers'] = int(apacheStatus['BusyWorkers'])
                res['idle_workers'] = int(apacheStatus['IdleWorkers'])
            else:
                self.set_error(
                    'getApacheStatus: BusyWorkers/IdleWorkers not present in mod_status output. Is the URL correct (must have ?auto at the end)?'
                )
        except (IndexError, KeyError):
            self.set_error(
                'getApacheStatus: IndexError - BusyWorkers/IdleWorkers not present in mod_status output. Is the URL correct (must have ?auto at the end)?'
            )

        return res