Пример #1
0
    def run(self):
        """ Run all instances. """

        # Store run statistics if needed
        before, after = None, None
        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                before = AgentCheck._collect_internal_stats()
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug(
                    "Failed to collect Agent Stats before check {0}".format(
                        self.name))

        instance_statuses = []
        for i, instance in enumerate(self.instances):
            try:
                min_collection_interval = instance.get(
                    'min_collection_interval',
                    self.init_config.get('min_collection_interval',
                                         self.DEFAULT_MIN_COLLECTION_INTERVAL))
                now = time.time()
                if now - self.last_collection_time[i] < min_collection_interval:
                    self.log.debug(
                        "Not running instance #{0} of check {1} as it ran less than {2}s ago"
                        .format(i, self.name, min_collection_interval))
                    continue

                self.last_collection_time[i] = now

                check_start_time = None
                if self.in_developer_mode:
                    check_start_time = timeit.default_timer()
                self.check(copy.deepcopy(instance))

                instance_check_stats = None
                if check_start_time is not None:
                    instance_check_stats = {
                        'run_time': timeit.default_timer() - check_start_time
                    }

                if self.has_warnings():
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_WARNING,
                        warnings=self.get_warnings(),
                        instance_check_stats=instance_check_stats)
                else:
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_OK,
                        instance_check_stats=instance_check_stats)
            except Exception, e:
                self.log.exception("Check '%s' instance #%s failed" %
                                   (self.name, i))
                instance_status = check_status.InstanceStatus(
                    i,
                    check_status.STATUS_ERROR,
                    error=str(e),
                    tb=traceback.format_exc())
            finally:
Пример #2
0
    def run(self):
        """ Run all instances. """
        instance_statuses = []
        for i, instance in enumerate(self.instances):
            try:
                min_collection_interval = instance.get('min_collection_interval',
                    self.init_config.get('min_collection_interval', self.DEFAULT_MIN_COLLECTION_INTERVAL))
                now = time.time()
                if now - self.last_collection_time[i] < min_collection_interval:
                    self.log.debug("Not running instance #{0} of check {1} as it ran less than {2}s ago".format(i, self.name, min_collection_interval))
                    continue

                self.last_collection_time[i] = now
                self.check(copy.deepcopy(instance))
                if self.has_warnings():
                    instance_status = check_status.InstanceStatus(i,
                        check_status.STATUS_WARNING,
                        warnings=self.get_warnings()
                    )
                else:
                    instance_status = check_status.InstanceStatus(i, check_status.STATUS_OK)
            except Exception, e:
                self.log.exception("Check '%s' instance #%s failed" % (self.name, i))
                instance_status = check_status.InstanceStatus(i,
                    check_status.STATUS_ERROR,
                    error=e,
                    tb=traceback.format_exc()
                )
            instance_statuses.append(instance_status)
Пример #3
0
 def run(self):
     """ Run all instances. """
     instance_statuses = []
     for i, instance in enumerate(self.instances):
         try:
             self.check(instance)
             instance_status = check_status.InstanceStatus(
                 i, check_status.STATUS_OK)
         except Exception, e:
             self.log.exception("Check '%s' instance #%s failed" %
                                (self.name, i))
             instance_status = check_status.InstanceStatus(
                 i, check_status.STATUS_ERROR, e)
         instance_statuses.append(instance_status)
Пример #4
0
 def run(self):
     """ Run all instances. """
     instance_statuses = []
     for i, instance in enumerate(self.instances):
         try:
             self.check(instance)
             if self.has_warnings():
                 instance_status = check_status.InstanceStatus(
                     i,
                     check_status.STATUS_WARNING,
                     warnings=self.get_warnings())
             else:
                 instance_status = check_status.InstanceStatus(
                     i, check_status.STATUS_OK)
         except Exception, e:
             self.log.exception("Check '%s' instance #%s failed" %
                                (self.name, i))
             instance_status = check_status.InstanceStatus(
                 i,
                 check_status.STATUS_ERROR,
                 error=e,
                 tb=traceback.format_exc())
         instance_statuses.append(instance_status)
Пример #5
0
    def run(self):
        """ Run all instances. """

        # Store run statistics if needed
        before, after = None, None
        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                before = AgentCheck._collect_internal_stats()
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug(
                    "Failed to collect Agent Stats before check {0}".format(
                        self.name))

        instance_statuses = []
        for i, instance in enumerate(self.instances):
            try:
                min_collection_interval = instance.get(
                    'min_collection_interval', self.min_collection_interval)

                now = time.time()
                if now - self.last_collection_time[i] < min_collection_interval:
                    self.log.debug(
                        "Not running instance #{0} of check {1} as it ran less than {2}s ago"
                        .format(i, self.name, min_collection_interval))
                    continue

                self.last_collection_time[i] = now

                check_start_time = None
                if self.in_developer_mode:
                    check_start_time = timeit.default_timer()
                self.check(copy.deepcopy(instance))

                instance_check_stats = None
                if check_start_time is not None:
                    instance_check_stats = {
                        'run_time': timeit.default_timer() - check_start_time
                    }

                if self.has_warnings():
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_WARNING,
                        warnings=self.get_warnings(),
                        instance_check_stats=instance_check_stats)
                else:
                    instance_status = check_status.InstanceStatus(
                        i,
                        check_status.STATUS_OK,
                        instance_check_stats=instance_check_stats)
            except Exception as e:
                self.log.exception("Check '%s' instance #%s failed" %
                                   (self.name, i))
                instance_status = check_status.InstanceStatus(
                    i,
                    check_status.STATUS_ERROR,
                    error=str(e),
                    tb=traceback.format_exc())
            finally:
                self._roll_up_instance_metadata()
                # Discard any remaining warning so that next instance starts clean
                self.get_warnings()

            instance_statuses.append(instance_status)

        if self.in_developer_mode and self.name != AGENT_METRICS_CHECK_NAME:
            try:
                after = AgentCheck._collect_internal_stats()
                if self.allow_profiling:
                    self._set_internal_profiling_stats(before, after)
                    log.info(
                        "\n \t %s %s" %
                        (self.name,
                         pretty_statistics(self._internal_profiling_stats)))
            except Exception:  # It's fine if we can't collect stats for the run, just log and proceed
                self.log.debug(
                    "Failed to collect Agent Stats after check {0}".format(
                        self.name))

        return instance_statuses
Пример #6
0
    def run(self):
        """ Run all instances. """

        inst = {}
        hosts = []
        for i, instance in enumerate(self.instances):
            inst[instance['addr']] = instance
            hosts.append(instance['addr'])
        instance_statuses = [None] * len(hosts)

        fping = Fping(hosts, self._ping_timeout)

        # record elapsed time for fping
        check_start_time = timeit.default_timer()
        elapsed_time = 0
        num = 0
        failures = {}
        while elapsed_time < self._last_check_time:
            result = fping.run()
            exec_time = timeit.default_timer()
            elapsed_time = exec_time - check_start_time
            num += 1

            instance_check_stats = {
                'run_time': timeit.default_timer() - check_start_time
            }
            for addr, v in result.items():
                instance = inst[addr]
                if v is None:
                    self._increment_with_tags('loss_cnt', instance)
                    failures[addr] = failures.get(addr, 0) + 1
                    if num == 1:
                        instance_status = check_status.InstanceStatus(
                            hosts.index(addr),
                            check_status.STATUS_WARNING,
                            warnings=self.get_warnings(),
                            instance_check_stats=instance_check_stats)
                else:
                    self.histogram('%s.rtt' % self._basename,
                                   v,
                                   tags=self._instance_tags(instance))
                    if num == 1:
                        instance_status = check_status.InstanceStatus(
                            hosts.index(addr),
                            check_status.STATUS_OK,
                            instance_check_stats=instance_check_stats)
                self._increment_with_tags('total_cnt', instance)
                self._roll_up_instance_metadata()
                if num == 1:
                    instance_statuses[hosts.index(addr)] = instance_status

        for addr in failures.keys():
            self.event({
                'timestamp':
                int(exec_time),
                'event_type':
                self._basename,
                'msg_title':
                'fping timeout',
                'msg_text':
                'ICMP Network Unreachable for ICMP Echo sent to %s %d times' %
                (addr, failures[addr]),
                'aggregation_key':
                md5(addr).hexdigest()
            })
        elapsed_time = timeit.default_timer() - check_start_time
        self.log.info("elapsed_time:%s[sec] check_times: %d" %
                      (round(elapsed_time, 2), num))
        return instance_statuses