def get_hostname(config=None): """ Get the canonical host name this agent should identify as. This is the authoritative source of the host name for the agent. Tries, in order: * agent config (agent.conf, "hostname:") * 'hostname -f' (on unix) * socket.gethostname() """ hostname = None # first, try the config if config is None: from monagent.common.config import get_config config = get_config(parse_args=True) config_hostname = config.get('hostname') if config_hostname and is_valid_hostname(config_hostname): return config_hostname # then move on to os-specific detection if hostname is None: def _get_hostname_unix(): try: # try fqdn p = subprocess.Popen(['/bin/hostname', '-f'], stdout=subprocess.PIPE) out, err = p.communicate() if p.returncode == 0: return out.strip() except Exception: return None os_name = get_os() if os_name in ['mac', 'freebsd', 'linux', 'solaris']: unix_hostname = _get_hostname_unix() if unix_hostname and is_valid_hostname(unix_hostname): hostname = unix_hostname # fall back on socket.gethostname(), socket.getfqdn() is too unreliable if hostname is None: try: socket_hostname = socket.gethostname() except socket.error: socket_hostname = None if socket_hostname and is_valid_hostname(socket_hostname): hostname = socket_hostname if hostname is None: log.critical( 'Unable to reliably determine host name. You can define one in agent.conf or in your hosts file' ) raise Exception( 'Unable to reliably determine host name. You can define one in agent.conf or in your hosts file' ) else: return hostname
def get_hostname(config=None): """ Get the canonical host name this agent should identify as. This is the authoritative source of the host name for the agent. Tries, in order: * agent config (agent.conf, "hostname:") * 'hostname -f' (on unix) * socket.gethostname() """ hostname = None # first, try the config if config is None: from monagent.common.config import get_config config = get_config(parse_args=True) config_hostname = config.get('hostname') if config_hostname and is_valid_hostname(config_hostname): return config_hostname # then move on to os-specific detection if hostname is None: def _get_hostname_unix(): try: # try fqdn p = subprocess.Popen(['/bin/hostname', '-f'], stdout=subprocess.PIPE) out, err = p.communicate() if p.returncode == 0: return out.strip() except Exception: return None os_name = get_os() if os_name in ['mac', 'freebsd', 'linux', 'solaris']: unix_hostname = _get_hostname_unix() if unix_hostname and is_valid_hostname(unix_hostname): hostname = unix_hostname # fall back on socket.gethostname(), socket.getfqdn() is too unreliable if hostname is None: try: socket_hostname = socket.gethostname() except socket.error: socket_hostname = None if socket_hostname and is_valid_hostname(socket_hostname): hostname = socket_hostname if hostname is None: log.critical( 'Unable to reliably determine host name. You can define one in agent.conf or in your hosts file') raise Exception( 'Unable to reliably determine host name. You can define one in agent.conf or in your hosts file') else: return hostname
def run(self): log.debug("Windows Service - Starting forwarder") set_win32_cert_path() port = self.config.get('listen_port', 17123) if port is None: port = 17123 else: port = int(port) app_config = get_config(parse_args=False) self.forwarder = Application(port, app_config, watchdog=False) self.forwarder.run()
def __init__(self, args): win32serviceutil.ServiceFramework.__init__(self, args) self.hWaitStop = win32event.CreateEvent(None, 0, 0, None) config = get_config(parse_args=False) # Setup the correct options so the agent will use the forwarder opts, args = Values({ 'clean': False, 'disabled_dd': False }), [] agentConfig = get_config(parse_args=False, options=opts) self.restart_interval = \ int(agentConfig.get('autorestart_interval', RESTART_INTERVAL)) log.info("Autorestarting the collector ever %s seconds" % self.restart_interval) # Keep a list of running processes so we can start/end as needed. # Processes will start started in order and stopped in reverse order. self.procs = { 'forwarder': DDForwarder(config), 'collector': DDAgent(agentConfig), 'dogstatsd': DogstatsdProcess(config), 'pup': PupProcess(config), }
def init_monstatsd(config_path=None, use_watchdog=False): """Configure the server and the reporting thread. """ c = get_config(parse_args=False, cfg_path=config_path) log.debug("Configuration monstatsd") port = c['monstatsd_port'] interval = int(c['monstatsd_interval']) aggregator_interval = int(c['monstatsd_agregator_bucket_size']) non_local_traffic = c['non_local_traffic'] forward_to_host = c.get('statsd_forward_host') forward_to_port = c.get('statsd_forward_port') event_chunk_size = c.get('event_chunk_size') target = c['forwarder_url'] hostname = get_hostname(c) # Create the aggregator (which is the point of communication between the # server and reporting threads. assert 0 < interval aggregator = MetricsBucketAggregator(hostname, aggregator_interval, recent_point_threshold=c.get( 'recent_point_threshold', None)) # Start the reporting thread. reporter = Reporter(interval, aggregator, target, use_watchdog, event_chunk_size) # Start the server on an IPv4 stack # Default to loopback server_host = 'localhost' # If specified, bind to all addressses if non_local_traffic: server_host = '' server = Server(aggregator, server_host, port, forward_to_host=forward_to_host, forward_to_port=forward_to_port) return reporter, server, c
def init_monstatsd(config_path=None, use_watchdog=False): """Configure the server and the reporting thread. """ c = get_config(parse_args=False, cfg_path=config_path) log.debug("Configuration monstatsd") port = c['monstatsd_port'] interval = int(c['monstatsd_interval']) aggregator_interval = int(c['monstatsd_agregator_bucket_size']) non_local_traffic = c['non_local_traffic'] forward_to_host = c.get('statsd_forward_host') forward_to_port = c.get('statsd_forward_port') event_chunk_size = c.get('event_chunk_size') target = c['forwarder_url'] hostname = get_hostname(c) # Create the aggregator (which is the point of communication between the # server and reporting threads. assert 0 < interval aggregator = MetricsBucketAggregator( hostname, aggregator_interval, recent_point_threshold=c.get( 'recent_point_threshold', None)) # Start the reporting thread. reporter = Reporter(interval, aggregator, target, use_watchdog, event_chunk_size) # Start the server on an IPv4 stack # Default to loopback server_host = 'localhost' # If specified, bind to all addressses if non_local_traffic: server_host = '' server = Server(aggregator, server_host, port, forward_to_host=forward_to_host, forward_to_port=forward_to_port) return reporter, server, c
def init_forwarder(skip_ssl_validation=False, use_simple_http_client=False): agent_config = get_config(parse_args=False) port = agent_config.get('listen_port', 17123) if port is None: port = 17123 else: port = int(port) app = Forwarder(port, agent_config, skip_ssl_validation=skip_ssl_validation, use_simple_http_client=use_simple_http_client) def sigterm_handler(signum, frame): log.info("caught sigterm. stopping") app.stop() signal.signal(signal.SIGTERM, sigterm_handler) signal.signal(signal.SIGINT, sigterm_handler) return app
def run(self, config=None): """Main loop of the collector""" # Gracefully exit on sigterm. signal.signal(signal.SIGTERM, self._handle_sigterm) # A SIGUSR1 signals an exit with an autorestart signal.signal(signal.SIGUSR1, self._handle_sigusr1) # Handle Keyboard Interrupt signal.signal(signal.SIGINT, self._handle_sigterm) # Save the agent start-up stats. CollectorStatus().persist() # Intialize the collector. if config is None: config = get_config(parse_args=True) # Load the checks_d checks checksd = load_check_directory(config) self.collector = Collector(config, http_emitter, checksd) # Configure the watchdog. check_frequency = int(config['check_freq']) watchdog = self._get_watchdog(check_frequency, config) # Initialize the auto-restarter self.restart_interval = int(config.get('restart_interval', RESTART_INTERVAL)) self.agent_start = time.time() # Run the main loop. while self.run_forever: # enable profiler if needed profiled = False if config.get('profile', False) and config.get('profile').lower() == 'yes': try: import cProfile profiler = cProfile.Profile() profiled = True profiler.enable() log.debug("Agent profiling is enabled") except Exception: log.warn("Cannot enable profiler") # Do the work. self.collector.run() # disable profiler and printout stats to stdout if config.get('profile', False) and config.get('profile').lower() == 'yes' and profiled: try: profiler.disable() import pstats from cStringIO import StringIO s = StringIO() ps = pstats.Stats(profiler, stream=s).sort_stats("cumulative") ps.print_stats() log.debug(s.getvalue()) except Exception: log.warn("Cannot disable profiler") # Check if we should restart. if self.autorestart and self._should_restart(): self._do_restart() # Only plan for the next loop if we will continue, # otherwise just exit quickly. if self.run_forever: if watchdog: watchdog.reset() time.sleep(check_frequency) # Now clean-up. try: CollectorStatus.remove_latest_status() except Exception: pass # Explicitly kill the process, because it might be running # as a daemon. log.info("Exiting. Bye bye.") sys.exit(0)
def main(): options, args = get_parsed_args() agentConfig = get_config(options=options) # todo autorestart isn't used remove autorestart = agentConfig.get('autorestart', False) COMMANDS = [ 'start', 'stop', 'restart', 'foreground', 'status', 'info', 'check', 'configcheck', 'jmx', ] if len(args) < 1: sys.stderr.write("Usage: %s %s\n" % (sys.argv[0], "|".join(COMMANDS))) return 2 command = args[0] if command not in COMMANDS: sys.stderr.write("Unknown command: %s\n" % command) return 3 pid_file = PidFile('mon-agent') if options.clean: pid_file.clean() agent = CollectorDaemon(pid_file.get_path(), autorestart) if command in START_COMMANDS: log.info('Agent version %s' % get_version()) if 'start' == command: log.info('Start daemon') agent.start() elif 'stop' == command: log.info('Stop daemon') agent.stop() elif 'restart' == command: log.info('Restart daemon') agent.restart() elif 'status' == command: agent.status() elif 'info' == command: return agent.info(verbose=options.verbose) elif 'foreground' == command: logging.info('Running in foreground') if autorestart: # Set-up the supervisor callbacks and fork it. logging.info('Running Agent with auto-restart ON') def child_func(): agent.run() def parent_func(): agent.start_event = False AgentSupervisor.start(parent_func, child_func) else: # Run in the standard foreground. agent.run(config=agentConfig) elif 'check' == command: check_name = args[1] try: # Try the old-style check first print getattr(collector.checks.collector, check_name)(log).check(agentConfig) except Exception: # If not an old-style check, try checks_d checks = load_check_directory(agentConfig) for check in checks['initialized_checks']: if check.name == check_name: check.run() print check.get_metrics() print check.get_events() if len(args) == 3 and args[2] == 'check_rate': print "Running 2nd iteration to capture rate metrics" time.sleep(1) check.run() print check.get_metrics() print check.get_events() elif 'configcheck' == command or 'configtest' == command: osname = get_os() all_valid = True for conf_path in glob.glob(os.path.join(get_confd_path(osname), "*.yaml")): basename = os.path.basename(conf_path) try: check_yaml(conf_path) except Exception as e: all_valid = False print "%s contains errors:\n %s" % (basename, e) else: print "%s is valid" % basename if all_valid: print "All yaml files passed. You can now run the Monitoring agent." return 0 else: print("Fix the invalid yaml files above in order to start the Monitoring agent. " "A useful external tool for yaml parsing can be found at " "http://yaml-online-parser.appspot.com/") return 1 elif 'jmx' == command: from collector.jmxfetch import JMX_LIST_COMMANDS, JMXFetch if len(args) < 2 or args[1] not in JMX_LIST_COMMANDS.keys(): print "#" * 80 print "JMX tool to be used to help configuring your JMX checks." print "See http://docs.datadoghq.com/integrations/java/ for more information" print "#" * 80 print "\n" print "You have to specify one of the following command:" for command, desc in JMX_LIST_COMMANDS.iteritems(): print " - %s [OPTIONAL: LIST OF CHECKS]: %s" % (command, desc) print "Example: sudo /etc/init.d/mon-agent jmx list_matching_attributes tomcat jmx solr" print "\n" else: jmx_command = args[1] checks_list = args[2:] confd_directory = get_confd_path(get_os()) should_run = JMXFetch.init( confd_directory, agentConfig, get_logging_config(), 15, jmx_command, checks_list, reporter="console") if not should_run: print "Couldn't find any valid JMX configuration in your conf.d directory: %s" % confd_directory print "Have you enabled any JMX check ?" return 0
def testWhiteSpaceConfig(self): """Leading whitespace confuse ConfigParser """ agent_config = get_config(cfg_path=os.path.join( os.path.dirname(os.path.realpath(__file__)), "badconfig.conf")) self.assertEqual(agent_config["api_key"], "1234")
def testWhiteSpaceConfig(self): """Leading whitespace confuse ConfigParser """ agent_config = get_config( cfg_path=os.path.join(os.path.dirname(os.path.realpath(__file__)), "badconfig.conf")) self.assertEquals(agent_config["api_key"], "1234")