Пример #1
0
 def get_last_measure(self, store_key):
     q = Queue("%s/%s.q" % (self.home, store_key),
               max_length=1,
               overflow_mode='slide')
     msg = q.shift()
     if msg:
         return json.loads(msg.content)
Пример #2
0
def get_metrics_queue(config, _=None):
    q = Queue('%s/dashboard.q' % (config.temboard['home']),
              max_length=(config.plugins['dashboard']['history_length'] + 1),
              overflow_mode='slide')
    dm = DashboardMetrics()
    msg = q.get_last_message()
    msg['notifications'] = dm.get_notifications(config)
    return msg
Пример #3
0
 def get_last_n(self, config, n):
     try:
         q_last = Queue(file_path='%s/notifications.q' %
                        (config.temboard['home']),
                        max_length=10 * 1024 * 1024,
                        overflow_mode='slide')
         return q_last.get_last_n_messages(n)
     except (Exception) as e:
         raise NotificationError('Can not get last notifications: %s' %
                                 e.message)
Пример #4
0
def monitoring_sender_worker(app):
    config = app.config
    c = 0
    logger.debug("Starting sender")
    q = Queue(os.path.join(config.temboard.home, 'metrics.q'),
              max_size=1024 * 1024 * 10, overflow_mode='slide')
    while True:
        # Let's do it smoothly..
        time.sleep(0.5)
        msg = q.shift(delete=False)

        if msg is None:
            # If we get nothing from the queue then we get out from this while
            # loop.
            break
        try:
            # Try to send data to temboard collector API
            logger.debug("Trying to send data to collector")
            logger.debug(config.monitoring.collector_url)
            logger.debug(msg.content)
            send_output(
                config.monitoring.ssl_ca_cert_file,
                config.monitoring.collector_url,
                config.temboard.key,
                msg.content
            )
        except HTTPError as e:
            # On error 409 (DB Integrity) we just drop the message and move to
            # the next message.
            if int(e.code) == 409:
                continue

            try:
                data = e.read()
                data = json.loads(data)
                message = data['error']
            except Exception as e:
                logger.debug("Can't get error details: %s", e)
                message = str(e)

            logger.error("Failed to send data to collector: %s", message)
            logger.error("You should find details in temBoard UI logs.")

            raise Exception("Failed to send data to collector.")

        # If everything's fine then remove current msg from the queue
        # Integrity check is made using check_msg
        q.shift(delete=True, check_msg=msg)

        if c > 60:
            break
        c += 1

    logger.debug("Done")
Пример #5
0
def supervision_sender_worker(commands, command, config):
    signal.signal(signal.SIGTERM, supervision_worker_sigterm_handler)
    start_time = time.time() * 1000
    set_logger_name("supervision_sender_worker")
    logger = get_logger(config)
    # TODO: logging methods in supervision plugin must be aligned.
    logging.root = logger
    logger.info("Start pid=%s id=%s" % (
        os.getpid(),
        command.commandid,
    ))
    command.state = COMMAND_START
    command.time = time.time()
    command.pid = os.getpid()
    commands.update(command)
    c = 0
    while True:
        # Let's do it smoothly..
        time.sleep(0.5)

        q = Queue('%s/metrics.q' % (config.temboard['home']),
                  max_size=1024 * 1024 * 10,
                  overflow_mode='slide')
        msg = q.shift(delete=False)
        if msg is None:
            break
        try:
            send_output(config.plugins['supervision']['ssl_ca_cert_file'],
                        config.plugins['supervision']['collector_url'],
                        config.plugins['supervision']['agent_key'],
                        msg.content)
        except urllib2.HTTPError as e:
            logger.error("Failed to send data.")
            logger.debug(e.message)
            logger.info("End. Duration: %s." %
                        (str(time.time() * 1000 - start_time)))
            # On an error 409 (DB Integrity) we need to remove the message.
            if int(e.code) != 409:
                return
        except Exception as e:
            logger.error("Failed to send data.")
            logger.debug(str(e))
            logger.info("End. Duration: %s." %
                        (str(time.time() * 1000 - start_time)))
            return
        _ = q.shift(delete=True, check_msg=msg)
        if c > 60:
            logger.info("End. Duration: %s." %
                        (str(time.time() * 1000 - start_time)))
            return
        c += 1
    logger.info("End. Duration: %s." % (str(time.time() * 1000 - start_time)))
Пример #6
0
def dashboard_collector_worker(app):
    logger.debug("Starting to collect dashboard data")
    data = metrics.get_metrics(app)

    # We don't want to store notifications in the history.
    data.pop('notifications', None)
    q = Queue(os.path.join(app.config.temboard.home, 'dashboard.q'),
              max_length=(app.config.dashboard.history_length + 1),
              overflow_mode='slide')

    q.push(Message(content=json.dumps(data)))
    logger.debug(data)
    logger.debug("End")
Пример #7
0
    def push(self, config, notification):
        try:
            # Notifications are stored in a "sliding" queue.
            q = Queue(file_path='%s/notifications.q' % (
                                config.temboard['home']),
                      max_size=10 * 1024 * 1024,  # 10MB
                      overflow_mode='slide')

            # Push the notification in the queue.
            q.push(Message(content=json.dumps({
                'date': notification.date,
                'username': notification.username,
                'message': notification.message})))
        except (Exception) as e:
            raise NotificationError('Can not push new notification: %s' %
                                    e.message)
Пример #8
0
def monitoring_sender_worker(config):
    signal.signal(signal.SIGTERM, monitoring_worker_sigterm_handler)
    # convert config dict to namedtuple
    config = collections.namedtuple(
        '__config', ['temboard', 'plugins', 'postgresql', 'logging'])(
            temboard=config['temboard'],
            plugins=config['plugins'],
            postgresql=config['postgresql'],
            logging=config['logging'])

    c = 0
    logger.debug("Starting sender")
    while True:
        # Let's do it smoothly..
        time.sleep(0.5)

        q = Queue('%s/metrics.q' % (config.temboard['home']),
                  max_size=1024 * 1024 * 10,
                  overflow_mode='slide')
        msg = q.shift(delete=False)
        if msg is None:
            break
        try:
            send_output(config.plugins['monitoring']['ssl_ca_cert_file'],
                        config.plugins['monitoring']['collector_url'],
                        config.temboard['key'], msg.content)
        except urllib2.HTTPError as e:
            logger.exception(e)
            # On an error 409 (DB Integrity) we need to remove the message.
            if int(e.code) != 409:
                logger.error("Failed with code=%s message=%s" %
                             (e.code, e.msg))
                sys.exit(1)
        except Exception as e:
            logger.exception(e)
            logger.error("Failed")
            sys.exit(1)

        # If everything's fine then remove current msg from the queue
        q.shift(delete=True, check_msg=msg)

        if c > 60:
            break
        c += 1
    logger.debug("Done")
Пример #9
0
    def delta(self, key, current_values):
        """
        Compute a delta between measures of two runs.

        Args:
            key (str): identify the values
            current_values (dict): mapping of latest measures

        Returns:
            a tuple of the time interval of the delta in seconds and a
            dict a delta with the same keys as the input.
        """
        current_time = time.time()
        store_key = self.get_name() + key
        last_measure = self.get_last_measure(store_key)
        delta = (None, None)
        delta_value = None
        # Compute deltas and update last_* variables
        try:
            if last_measure:
                delta_time = current_time - last_measure['measure_time']

                delta_values = {}
                for k in current_values.keys():
                    delta_value = current_values[k] - \
                        last_measure['measure'][k]
                    if delta_value < 0:
                        raise Exception('Negative delta value.')
                    delta_values[k] = delta_value

                delta = (delta_time, delta_values)
        except Exception as e:
            delta = (None, None)
        try:
            q = Queue("%s/%s.q" % (self.home, store_key),
                      max_length=1,
                      overflow_mode='slide')
            q.push(
                Message(content=json.dumps({
                    'measure_time': current_time,
                    'measure': dict(current_values)
                })))
        except Exception as e:
            logger.error(str(e))
        return delta
Пример #10
0
def dashboard_collector_worker(config):
    try:
        signal.signal(signal.SIGTERM, dashboard_worker_sigterm_handler)
        logger.debug("Collecting data")
        conn = connector(
            host=config['postgresql']['host'],
            port=config['postgresql']['port'],
            user=config['postgresql']['user'],
            password=config['postgresql']['password'],
            database=config['postgresql']['dbname']
        )
        conn.connect()
        # convert config dict to namedtuple
        config_nt = collections.namedtuple(
                        '__config',
                        ['temboard', 'plugins', 'postgresql', 'logging']
                    )(
                        temboard=config['temboard'],
                        plugins=config['plugins'],
                        postgresql=config['postgresql'],
                        logging=config['logging']
                     )
        # Collect data
        data = metrics.get_metrics(conn, config_nt)
        conn.close()

        # We don't want to store notifications in the history.
        data.pop('notifications', None)
        q = Queue('%s/dashboard.q' % (config['temboard']['home']),
                  max_length=(config['plugins']['dashboard']['history_length']
                              +1),
                  overflow_mode='slide'
                  )
        q.push(Message(content=json.dumps(data)))
        logger.debug(data)
        logger.debug("End")
    except (error, Exception) as e:
        logger.error("Could not collect data")
        logger.exception(e)
        try:
            conn.close()
        except Exception:
            pass
        sys.exit(1)
Пример #11
0
def monitoring_collector_worker(app):
    """
    Run probes and push collected metrics in a queue.
    """
    logger.debug("Starting monitoring collector")
    config = app.config
    conninfo = dict(
        host=config.postgresql.host,
        port=config.postgresql.port,
        user=config.postgresql.user,
        database=config.postgresql.dbname,
        password=config.postgresql.password,
        dbnames=config.monitoring.dbnames,
        instance=config.postgresql.instance,
    )

    system_info = host_info(config.temboard.hostname)
    # Load the probes to run
    probes = load_probes(config.monitoring, config.temboard.home)

    instance = instance_info(conninfo, system_info['hostname'])

    logger.debug("Running probes")
    # Gather the data from probes
    data = run_probes(probes, [instance])

    # Prepare and send output
    output = dict(
        datetime=now(),
        hostinfo=system_info,
        instances=remove_passwords([instance]),
        data=data,
        version=__VERSION__,
    )
    logger.debug(output)
    q = Queue(os.path.join(config.temboard.home, 'metrics.q'),
              max_size=1024 * 1024 * 10,
              overflow_mode='slide')
    q.push(Message(content=json.dumps(output)))
    logger.debug("Done")
Пример #12
0
def dashboard_collector_worker(commands, command, config):
    try:
        signal.signal(signal.SIGTERM, dashboard_worker_sigterm_handler)
        start_time = time.time() * 1000
        set_logger_name("dashboard_collector")
        logger = get_logger(config)
        logger.debug("Starting with pid=%s" % (getpid()))
        logger.debug("commandid=%s" % (command.commandid))
        command.state = COMMAND_START
        command.time = time.time()
        command.pid = getpid()
        commands.update(command)

        conn = connector(host=config.postgresql['host'],
                         port=config.postgresql['port'],
                         user=config.postgresql['user'],
                         password=config.postgresql['password'],
                         database=config.postgresql['dbname'])
        conn.connect()
        db_metrics = metrics.get_metrics(conn, config)
        # We don't want to store notifications in the history.
        db_metrics.pop('notifications', None)

        conn.close()
        q = Queue('%s/dashboard.q' % (config.temboard['home']),
                  max_length=(config.plugins['dashboard']['history_length'] +
                              1),
                  overflow_mode='slide')
        q.push(Message(content=json.dumps(db_metrics)))
        logger.debug("Duration: %s." % (str(time.time() * 1000 - start_time)))
        logger.debug("Done.")
    except (error, Exception) as e:
        logger.traceback(get_tb())
        logger.error(str(e))
        logger.debug("Failed.")
        try:
            conn.close()
        except Exception:
            pass
        sys.exit(1)
Пример #13
0
def supervision_collector_worker(commands, command, config):
    """
    Run probes and push collected metrics in a queue.
    """
    signal.signal(signal.SIGTERM, supervision_worker_sigterm_handler)

    start_time = time.time() * 1000
    set_logger_name("supervision_collector_worker")
    logger = get_logger(config)
    # TODO: logging methods in supervision plugin must be aligned.
    logging.root = logger
    logger.info("Start pid=%s id=%s" % (
        os.getpid(),
        command.commandid,
    ))
    command.state = COMMAND_START
    command.time = time.time()
    command.pid = os.getpid()
    commands.update(command)
    try:
        system_info = host_info(config.plugins['supervision'])
    except ValueError as e:
        logger.error(
            "supervision_collector_worker - unable to get system information: %s\n"
            % str(e))
        sys.exit(1)

# Load the probes to run
    probes = load_probes(config.plugins['supervision'],
                         config.temboard['home'])
    config.plugins['supervision']['conninfo'] = [{
        'host':
        config.postgresql['host'],
        'port':
        config.postgresql['port'],
        'user':
        config.postgresql['user'],
        'database':
        config.postgresql['dbname'],
        'password':
        config.postgresql['password'],
        'dbnames':
        config.plugins['supervision']['dbnames'],
        'instance':
        config.postgresql['instance']
    }]

    # Validate connection information from the config, and ensure
    # the instance is available
    instances = []
    for conninfo in config.plugins['supervision']['conninfo']:
        logging.debug("Validate connection information on instance \"%s\"",
                      conninfo['instance'])
        instances.append(instance_info(conninfo, system_info['hostname']))

    # Gather the data from probes
    data = run_probes(probes, system_info['hostname'], instances)

    # Prepare and send output
    output = {
        'datetime': now(),
        'hostinfo': system_info,
        'instances': remove_passwords(instances),
        'data': data,
        'version': __VERSION__
    }
    q = Queue('%s/metrics.q' % (config.temboard['home']),
              max_size=1024 * 1024 * 10,
              overflow_mode='slide')
    q.push(Message(content=json.dumps(output)))
    logger.info("End. Duration: %s." % (str(time.time() * 1000 - start_time)))
Пример #14
0
def monitoring_collector_worker(config):
    """
    Run probes and push collected metrics in a queue.
    """
    signal.signal(signal.SIGTERM, monitoring_worker_sigterm_handler)
    # convert config dict to namedtuple
    config = collections.namedtuple(
        '__config', ['temboard', 'plugins', 'postgresql', 'logging'])(
            temboard=config['temboard'],
            plugins=config['plugins'],
            postgresql=config['postgresql'],
            logging=config['logging'])

    logger.debug("Starting collector")

    try:
        system_info = host_info(config.temboard['hostname'])
    except (ValueError, Exception) as e:
        logger.exception(e)
        logger.debug("Failed")
        sys.exit(1)

    # Load the probes to run
    try:
        probes = load_probes(config.plugins['monitoring'],
                             config.temboard['home'])
        config.plugins['monitoring']['conninfo'] = [{
            'host':
            config.postgresql['host'],
            'port':
            config.postgresql['port'],
            'user':
            config.postgresql['user'],
            'database':
            config.postgresql['dbname'],
            'password':
            config.postgresql['password'],
            'dbnames':
            config.plugins['monitoring']['dbnames'],
            'instance':
            config.postgresql['instance']
        }]

        # Validate connection information from the config, and ensure
        # the instance is available
        instances = []
        for conninfo in config.plugins['monitoring']['conninfo']:
            instances.append(instance_info(conninfo, system_info['hostname']))

        logger.debug("Running probes")
        # Gather the data from probes
        data = run_probes(probes, instances)

        # Prepare and send output
        output = {
            'datetime': now(),
            'hostinfo': system_info,
            'instances': remove_passwords(instances),
            'data': data,
            'version': __VERSION__
        }
        logger.debug(output)
        q = Queue('%s/metrics.q' % (config.temboard['home']),
                  max_size=1024 * 1024 * 10,
                  overflow_mode='slide')
        q.push(Message(content=json.dumps(output)))
        logger.debug("Done")
    except Exception as e:
        logger.exception(e)
        logger.error("Could not collect data")
        sys.exit(1)
Пример #15
0
def get_history_metrics_queue(config, _=None):
    q = Queue('%s/dashboard.q' % (config.temboard['home']),
              max_length=(config.plugins['dashboard']['history_length'] + 1),
              overflow_mode='slide')
    return q.get_content_all_messages()
Пример #16
0
def get_metrics_queue(config):
    q = Queue(os.path.join(config.temboard.home, 'dashboard.q'))
    dm = DashboardMetrics()
    msg = q.get_last_message()
    msg['notifications'] = dm.get_notifications(config)
    return msg
Пример #17
0
def get_history_metrics_queue(config):
    q = Queue(os.path.join(config.temboard.home, 'dashboard.q'))
    return q.get_content_all_messages()
Пример #18
0
def get_history_metrics_queue(config, _=None):
    q = Queue('%s/dashboard.q' % (config.temboard['home']))
    return q.get_content_all_messages()
Пример #19
0
def get_metrics_queue(config, _=None):
    q = Queue('%s/dashboard.q' % (config.temboard['home']))
    dm = DashboardMetrics()
    msg = q.get_last_message()
    msg['notifications'] = dm.get_notifications(config)
    return msg