Пример #1
0
def add_machine(uuid, password, update_collectd=True):
    """Adds machine to monitored list and inform collectd of new machine."""

    if not uuid:
        raise RequiredParameterMissingError("uuid")
    if not password:
        raise RequiredParameterMissingError("password")

    machine = get_machine_from_uuid(uuid)
    if machine:
        ## raise MachineExistsError(uuid)
        with machine.lock_n_load():
            machine.collectd_password = password
            machine.enabled_time = time()
            machine.save()
    else:
        machine = Machine()
        machine.uuid = uuid
        machine.collectd_password = password
        machine.enabled_time = time()
        machine.create()

    # add uuid/passwd in collectd.passwd
    if update_collectd:
        update_collectd_conf()

    # add no-data rule
    add_rule(machine.uuid, "nodata", "nodata", "gt", 0)
Пример #2
0
def add_rule(uuid, rule_id, metric, operator, value,
             aggregate="all", reminder_list=None, reminder_offset=0,
             active_after=30):
    """Add or update a rule."""

    if aggregate not in ('all', 'any', 'avg'):
        raise BadRequestError("Param 'aggregate' must be in "
                              "('all', 'any', 'avg').")
    machine = get_machine_from_uuid(uuid)
    if not machine:
        raise MachineNotFoundError(uuid)

    # create new condition
    condition = Condition()
    condition.uuid = uuid
    condition.rule_id = rule_id
    condition.cond_id = get_rand_token()
    condition.active_after = time() + active_after
    condition.metric = metric
    condition.operator = operator
    condition.aggregate = aggregate
    condition.value = value
    # reminder_list should be a list of integers (notifications after rule
    # being triggered in seconds). If not provided, default will be used.
    if reminder_list:
        condition.reminder_list = reminder_list
    condition.reminder_offset = reminder_offset
    # we set notification level to 1 so that new rules that are not satisfied
    # don't send an OK to core immediately after creation
    condition.notification_level = 1

    # TODO: verify target is valid

    condition.create()

    with machine.lock_n_load():
        # if rule doesn't exist, create it
        if rule_id not in machine.rules:
            rule = Rule()
            machine.rules[rule_id] = rule
        rule = machine.rules[rule_id]
        # if rule had an associated condition, remove it
        if rule.warning:
            old_condition = machine.get_condition(rule_id)
            old_condition.delete()
        # associate new condition with rule
        rule.warning = condition.cond_id
        machine.save()
Пример #3
0
def remove_rule(uuid, rule_id):
    """Remove a rule from a machine (along with its associated condition)."""

    machine = get_machine_from_uuid(uuid)
    if not machine:
        raise MachineNotFoundError(uuid)
    with machine.lock_n_load():
        if not rule_id in machine.rules:
            raise RuleNotFoundError(rule_id)

        # delete associated condition
        condition = machine.get_condition(rule_id)
        condition.delete()

        # delete rule
        del machine.rules[rule_id]
        machine.save()
Пример #4
0
 def dispatch(self, host, names):
     machine = get_machine_from_uuid(host)
     if not machine:
         log.error("machine not found, wtf!")
         return
     multihandler = MultiHandler(host)
     metrics = []
     for name in names:
         target = statname(host, name)
         metric = multihandler.decorate_target(target)
         if metric['alias'].rfind("%(head)s.") == 0:
             metric['alias'] = metric['alias'][9:]
         plugin = metric['alias'].split('.')[0]
         if plugin not in self.ignore_plugins:
             metrics.append(metric)
     if not metrics:
         return
     log.info("New metrics for host %s, notifying core: %s", host, metrics)
     payload = {
         'uuid': host,
         'collectd_password': machine.collectd_password,
         'metrics': metrics,
     }
     try:
         resp = requests.post(
             "%s/new_metrics" % mon_config.CORE_URI,
             data=json.dumps(payload),
             verify=mon_config.SSL_VERIFY
         )
     except Exception as exc:
         log.error("Error notifying core: %r", exc)
         return
     if not resp.ok:
         log.error("Bad response from core: %s", resp.text)
     # also save to file in disk
     if self.fh is not None:
         try:
             for name in names:
                 self.fh.write("%s %s\n" % (host, name))
             self.fh.flush()
         except IOError as exc:
             log.error("Error writing to metrics file: %s", exc)
Пример #5
0
def remove_machine(uuid):
    """Removes a machine from monitored list and from collectd's conf files."""

    if not uuid:
        raise RequiredParameterMissingError("uuid")

    machine = get_machine_from_uuid(uuid)
    if not machine:
        raise MachineNotFoundError(uuid)

    for rule_id in machine.rules:
        try:
            remove_rule(uuid, rule_id)
        except:
            log.error("Error removing rule '%s'.", rule_id)

    machine.delete()

    # reconstruct collectd passwords file to remove uuid/passwd
    update_collectd_conf()