Пример #1
0
    def reload_configs(self, checks_to_reload=set()):
        """Reload the agent configuration and checksd configurations.
           Can also reload only an explicit set of checks."""
        log.info("Attempting a configuration reload...")
        hostname = get_hostname(self._agentConfig)
        jmx_sd_configs = None

        # if no check was given, reload them all
        if not checks_to_reload:
            log.debug("No check list was passed, reloading every check")
            # stop checks
            for check in self._checksd.get('initialized_checks', []):
                check.stop()

            self._checksd = load_check_directory(self._agentConfig, hostname)
            if self._jmx_service_discovery_enabled:
                jmx_sd_configs = generate_jmx_configs(self._agentConfig,
                                                      hostname)
        else:
            new_checksd = copy(self._checksd)
            all_jmx_checks = get_jmx_checks(auto_conf=True)
            jmx_checks = [
                check for check in checks_to_reload if check in all_jmx_checks
            ]
            py_checks = set(checks_to_reload) - set(jmx_checks)
            self.refresh_specific_checks(hostname, new_checksd, py_checks)
            if self._jmx_service_discovery_enabled:
                jmx_sd_configs = generate_jmx_configs(self._agentConfig,
                                                      hostname, jmx_checks)

            # once the reload is done, replace existing checks with the new ones
            self._checksd = new_checksd

        if jmx_sd_configs:
            self._submit_jmx_service_discovery(jmx_sd_configs)

        # Logging
        num_checks = len(self._checksd['initialized_checks'])
        if num_checks > 0:
            opt_msg = " (refreshed %s checks)" % len(
                checks_to_reload) if checks_to_reload else ''

            msg = "Check reload was successful. Running {num_checks} checks{opt_msg}.".format(
                num_checks=num_checks, opt_msg=opt_msg)
            log.info(msg)
        else:
            log.info("No checksd configs found")
Пример #2
0
    def reload_configs(self, checks_to_reload=set()):
        """Reload the agent configuration and checksd configurations.
           Can also reload only an explicit set of checks."""
        log.info("Attempting a configuration reload...")
        hostname = get_hostname(self._agentConfig)
        jmx_sd_configs = None

        # if no check was given, reload them all
        if not checks_to_reload:
            log.debug("No check list was passed, reloading every check")
            # stop checks
            for check in self._checksd.get('initialized_checks', []):
                check.stop()

            self._checksd = load_check_directory(self._agentConfig, hostname)
            if self._jmx_service_discovery_enabled:
                jmx_sd_configs = generate_jmx_configs(self._agentConfig, hostname)
        else:
            new_checksd = copy(self._checksd)

            jmx_checks = [check for check in checks_to_reload if check in JMX_CHECKS]
            py_checks = set(checks_to_reload) - set(jmx_checks)
            self.refresh_specific_checks(hostname, new_checksd, py_checks)
            if self._jmx_service_discovery_enabled:
                jmx_sd_configs = generate_jmx_configs(self._agentConfig, hostname, jmx_checks)

            # once the reload is done, replace existing checks with the new ones
            self._checksd = new_checksd

        if jmx_sd_configs:
            self._submit_jmx_service_discovery(jmx_sd_configs)

        # Logging
        num_checks = len(self._checksd['initialized_checks'])
        if num_checks > 0:
            opt_msg = " (refreshed %s checks)" % len(checks_to_reload) if checks_to_reload else ''

            msg = "Check reload was successful. Running {num_checks} checks{opt_msg}.".format(
                num_checks=num_checks, opt_msg=opt_msg)
            log.info(msg)
        else:
            log.info("No checksd configs found")
 def test_read_jmx_config_from_store(self, *args):
     """Test JMX configs are read and converted to YAML"""
     jmx_configs = generate_jmx_configs(self.auto_conf_agentConfig, "jmxhost")
     valid_configs = {
         'solr_0': "init_config: {}\ninstances:\n- host: localhost\n  password: bar\n  "
         "port: '9999'\n  username: foo\n- host: remotehost\n  password: bar\n  "
         "port: '5555'\n  username: haz\n",
         'tomcat_0': "init_config: {}\ninstances:\n- host: localhost\n  port: '9012'\n"
     }
     for check in self.jmx_sd_configs:
         key = '{}_0'.format(check)
         self.assertEquals(jmx_configs[key], valid_configs[key])
Пример #4
0
 def test_read_jmx_config_from_store(self, *args):
     """Test JMX configs are read and converted to YAML"""
     jmx_configs = generate_jmx_configs(self.auto_conf_agentConfig, "jmxhost")
     valid_configs = {
         'solr_0': "init_config: {}\ninstances:\n- host: localhost\n  password: bar\n  "
         "port: '9999'\n  username: foo\n- host: remotehost\n  password: bar\n  "
         "port: '5555'\n  username: haz\n",
         'tomcat_0': "init_config: {}\ninstances:\n- host: localhost\n  port: '9012'\n"
     }
     for check in self.jmx_sd_configs:
         key = '{}_0'.format(check)
         self.assertEquals(jmx_configs[key], valid_configs[key])
Пример #5
0
    def run(self, config=None):
        """Main loop of the collector"""

        # Gracefully exit on sigterm.
        signal.signal(signal.SIGTERM, self._handle_sigterm)

        if not Platform.is_windows():
            # A SIGUSR1 signals an exit with an autorestart
            signal.signal(signal.SIGUSR1, self._handle_sigusr1)

            # Handle Keyboard Interrupt
            signal.signal(signal.SIGINT, self._handle_sigterm)

            # A SIGHUP signals a configuration reload
            signal.signal(signal.SIGHUP, self._handle_sighup)

        # Save the agent start-up stats.
        CollectorStatus().persist()

        # Intialize the collector.
        if not config:
            config = get_config(parse_args=True)

        self._agentConfig = self._set_agent_config_hostname(config)
        hostname = get_hostname(self._agentConfig)
        systemStats = get_system_stats(proc_path=self._agentConfig.get(
            'procfs_path', '/proc').rstrip('/'))
        emitters = self._get_emitters()

        # Initialize service discovery
        if self._agentConfig.get('service_discovery'):
            self.sd_backend = get_sd_backend(self._agentConfig)

        if _is_affirmative(self._agentConfig.get('sd_jmx_enable', False)):
            pipe_path = get_jmx_pipe_path()
            if Platform.is_windows():
                pipe_name = pipe_path.format(pipename=SD_PIPE_NAME)
            else:
                pipe_name = os.path.join(pipe_path, SD_PIPE_NAME)

            if os.access(pipe_path, os.W_OK):
                if not os.path.exists(pipe_name):
                    os.mkfifo(pipe_name)
                self.sd_pipe = os.open(
                    pipe_name, os.O_RDWR)  # RW to avoid blocking (will only W)

                # Initialize Supervisor proxy
                self.supervisor_proxy = self._get_supervisor_socket(
                    self._agentConfig)
            else:
                log.debug(
                    'Unable to create pipe in temporary directory. JMX service discovery disabled.'
                )

        # Load the checks.d checks
        self._checksd = load_check_directory(self._agentConfig, hostname)

        # Load JMX configs if available
        if self._jmx_service_discovery_enabled:
            jmx_sd_configs = generate_jmx_configs(self._agentConfig, hostname)
            if jmx_sd_configs:
                self._submit_jmx_service_discovery(jmx_sd_configs)

        # Initialize the Collector
        self.collector = Collector(self._agentConfig, emitters, systemStats,
                                   hostname)

        # In developer mode, the number of runs to be included in a single collector profile
        try:
            self.collector_profile_interval = int(
                self._agentConfig.get('collector_profile_interval',
                                      DEFAULT_COLLECTOR_PROFILE_INTERVAL))
        except ValueError:
            log.warn('collector_profile_interval is invalid. '
                     'Using default value instead (%s).' %
                     DEFAULT_COLLECTOR_PROFILE_INTERVAL)
            self.collector_profile_interval = DEFAULT_COLLECTOR_PROFILE_INTERVAL

        # Configure the watchdog.
        self.check_frequency = int(self._agentConfig['check_freq'])
        watchdog = self._get_watchdog(self.check_frequency)

        # Initialize the auto-restarter
        self.restart_interval = int(
            self._agentConfig.get('restart_interval', RESTART_INTERVAL))
        self.agent_start = time.time()

        self.allow_profiling = self._agentConfig.get('allow_profiling', True)

        profiled = False
        collector_profiled_runs = 0

        # Run the main loop.
        while self.run_forever:
            # Setup profiling if necessary
            if self.allow_profiling and self.in_developer_mode and not profiled:
                try:
                    profiler = AgentProfiler()
                    profiler.enable_profiling()
                    profiled = True
                except Exception as e:
                    log.warn("Cannot enable profiler: %s" % str(e))

            if self.reload_configs_flag:
                if isinstance(self.reload_configs_flag, set):
                    self.reload_configs(
                        checks_to_reload=self.reload_configs_flag)
                else:
                    self.reload_configs()

            # Do the work. Pass `configs_reloaded` to let the collector know if it needs to
            # look for the AgentMetrics check and pop it out.
            self.collector.run(
                checksd=self._checksd,
                start_event=self.start_event,
                configs_reloaded=True if self.reload_configs_flag else False)

            self.reload_configs_flag = False

            # Look for change in the config template store.
            # The self.sd_backend.reload_check_configs flag is set
            # to True if a config reload is needed.
            if self._agentConfig.get('service_discovery') and self.sd_backend and \
               not self.sd_backend.reload_check_configs:
                try:
                    self.sd_backend.reload_check_configs = get_config_store(
                        self._agentConfig).crawl_config_template()
                except Exception as e:
                    log.warn(
                        'Something went wrong while looking for config template changes: %s'
                        % str(e))

            # Check if we should run service discovery
            # The `reload_check_configs` flag can be set through the docker_daemon check or
            # using ConfigStore.crawl_config_template
            if self._agentConfig.get('service_discovery') and self.sd_backend and \
               self.sd_backend.reload_check_configs:
                self.reload_configs_flag = self.sd_backend.reload_check_configs
                self.sd_backend.reload_check_configs = False

            if profiled:
                if collector_profiled_runs >= self.collector_profile_interval:
                    try:
                        profiler.disable_profiling()
                        profiled = False
                        collector_profiled_runs = 0
                    except Exception as e:
                        log.warn("Cannot disable profiler: %s" % str(e))

            # Check if we should restart.
            if self.autorestart and self._should_restart():
                self._do_restart()

            # Only plan for next loop if we will continue, otherwise exit quickly.
            if self.run_forever:
                if watchdog:
                    watchdog.reset()
                if profiled:
                    collector_profiled_runs += 1
                log.debug("Sleeping for {0} seconds".format(
                    self.check_frequency))
                time.sleep(self.check_frequency)

        # Now clean-up.
        try:
            CollectorStatus.remove_latest_status()
        except Exception:
            pass

        # Explicitly kill the process, because it might be running as a daemon.
        log.info("Exiting. Bye bye.")
        sys.exit(0)
Пример #6
0
    def run(self, config=None):
        """Main loop of the collector"""

        # Gracefully exit on sigterm.
        signal.signal(signal.SIGTERM, self._handle_sigterm)

        # A SIGUSR1 signals an exit with an autorestart
        signal.signal(signal.SIGUSR1, self._handle_sigusr1)

        # Handle Keyboard Interrupt
        signal.signal(signal.SIGINT, self._handle_sigterm)

        # A SIGHUP signals a configuration reload
        signal.signal(signal.SIGHUP, self._handle_sighup)

        # Save the agent start-up stats.
        CollectorStatus().persist()

        # Intialize the collector.
        if not config:
            config = get_config(parse_args=True)

        self._agentConfig = self._set_agent_config_hostname(config)
        hostname = get_hostname(self._agentConfig)
        systemStats = get_system_stats(
            proc_path=self._agentConfig.get('procfs_path', '/proc').rstrip('/')
        )
        emitters = self._get_emitters()

        # Initialize service discovery
        if self._agentConfig.get('service_discovery'):
            self.sd_backend = get_sd_backend(self._agentConfig)

        if _is_affirmative(self._agentConfig.get('sd_jmx_enable')):
            pipe_path = get_jmx_pipe_path()
            if Platform.is_windows():
                pipe_name = pipe_path.format(pipename=SD_PIPE_NAME)
            else:
                pipe_name = os.path.join(pipe_path, SD_PIPE_NAME)

            if os.access(pipe_path, os.W_OK):
                if not os.path.exists(pipe_name):
                    os.mkfifo(pipe_name)
                self.sd_pipe = os.open(pipe_name, os.O_RDWR) # RW to avoid blocking (will only W)

                # Initialize Supervisor proxy
                self.supervisor_proxy = self._get_supervisor_socket(self._agentConfig)
            else:
                log.debug('Unable to create pipe in temporary directory. JMX service discovery disabled.')

        # Load the checks.d checks
        self._checksd = load_check_directory(self._agentConfig, hostname)

        # Load JMX configs if available
        if self._jmx_service_discovery_enabled:
            jmx_sd_configs = generate_jmx_configs(self._agentConfig, hostname)
            if jmx_sd_configs:
                self._submit_jmx_service_discovery(jmx_sd_configs)

        # Initialize the Collector
        self.collector = Collector(self._agentConfig, emitters, systemStats, hostname)

        # In developer mode, the number of runs to be included in a single collector profile
        try:
            self.collector_profile_interval = int(
                self._agentConfig.get('collector_profile_interval', DEFAULT_COLLECTOR_PROFILE_INTERVAL))
        except ValueError:
            log.warn('collector_profile_interval is invalid. '
                     'Using default value instead (%s).' % DEFAULT_COLLECTOR_PROFILE_INTERVAL)
            self.collector_profile_interval = DEFAULT_COLLECTOR_PROFILE_INTERVAL

        # Configure the watchdog.
        self.check_frequency = int(self._agentConfig['check_freq'])
        watchdog = self._get_watchdog(self.check_frequency)

        # Initialize the auto-restarter
        self.restart_interval = int(self._agentConfig.get('restart_interval', RESTART_INTERVAL))
        self.agent_start = time.time()

        self.allow_profiling = self._agentConfig.get('allow_profiling', True)

        profiled = False
        collector_profiled_runs = 0

        # Run the main loop.
        while self.run_forever:
            # Setup profiling if necessary
            if self.allow_profiling and self.in_developer_mode and not profiled:
                try:
                    profiler = AgentProfiler()
                    profiler.enable_profiling()
                    profiled = True
                except Exception as e:
                    log.warn("Cannot enable profiler: %s" % str(e))

            if self.reload_configs_flag:
                if isinstance(self.reload_configs_flag, set):
                    self.reload_configs(checks_to_reload=self.reload_configs_flag)
                else:
                    self.reload_configs()

            # Do the work. Pass `configs_reloaded` to let the collector know if it needs to
            # look for the AgentMetrics check and pop it out.
            self.collector.run(checksd=self._checksd,
                               start_event=self.start_event,
                               configs_reloaded=True if self.reload_configs_flag else False)

            self.reload_configs_flag = False

            # Look for change in the config template store.
            # The self.sd_backend.reload_check_configs flag is set
            # to True if a config reload is needed.
            if self._agentConfig.get('service_discovery') and self.sd_backend and \
               not self.sd_backend.reload_check_configs:
                try:
                    self.sd_backend.reload_check_configs = get_config_store(
                        self._agentConfig).crawl_config_template()
                except Exception as e:
                    log.warn('Something went wrong while looking for config template changes: %s' % str(e))

            # Check if we should run service discovery
            # The `reload_check_configs` flag can be set through the docker_daemon check or
            # using ConfigStore.crawl_config_template
            if self._agentConfig.get('service_discovery') and self.sd_backend and \
               self.sd_backend.reload_check_configs:
                self.reload_configs_flag = self.sd_backend.reload_check_configs
                self.sd_backend.reload_check_configs = False

            if profiled:
                if collector_profiled_runs >= self.collector_profile_interval:
                    try:
                        profiler.disable_profiling()
                        profiled = False
                        collector_profiled_runs = 0
                    except Exception as e:
                        log.warn("Cannot disable profiler: %s" % str(e))

            # Check if we should restart.
            if self.autorestart and self._should_restart():
                self._do_restart()

            # Only plan for next loop if we will continue, otherwise exit quickly.
            if self.run_forever:
                if watchdog:
                    watchdog.reset()
                if profiled:
                    collector_profiled_runs += 1
                log.debug("Sleeping for {0} seconds".format(self.check_frequency))
                time.sleep(self.check_frequency)

        # Now clean-up.
        try:
            CollectorStatus.remove_latest_status()
        except Exception:
            pass

        # Explicitly kill the process, because it might be running as a daemon.
        log.info("Exiting. Bye bye.")
        sys.exit(0)
Пример #7
0
 def sd_pipe_jmx_configs(self, hostname):
     jmx_sd_configs = generate_jmx_configs(self._agentConfig, hostname)
     if jmx_sd_configs:
         self._submit_jmx_service_discovery(jmx_sd_configs)