def run(self, command=None, checks_list=None, reporter=None, redirect_std_streams=False): """ Run JMXFetch redirect_std_streams: if left to False, the stdout and stderr of JMXFetch are streamed directly to the environment's stdout and stderr and cannot be retrieved via python's sys.stdout and sys.stderr. Set to True to redirect these streams to python's sys.stdout and sys.stderr. """ command = command or JMX_COLLECT_COMMAND if checks_list or self.jmx_checks is None: # (Re)set/(re)configure JMXFetch parameters when `checks_list` is specified or # no configuration was found self.configure(checks_list) try: if len(self.invalid_checks) > 0: try: JMXFiles.write_status_file(self.invalid_checks) except Exception: log.exception("Error while writing JMX status file") if len(self.jmx_checks) > 0 or self.service_discovery: return self._start(self.java_bin_path, self.java_options, self.jmx_checks, command, reporter, self.tools_jar_path, self.custom_jar_paths, redirect_std_streams) else: # We're exiting purposefully, so exit with zero (supervisor's expected # code). HACK: Sleep a little bit so supervisor thinks we've started cleanly # and thus can exit cleanly. time.sleep(4) log.info("No valid JMX integration was found. Exiting ...") except Exception: log.exception("Error while initiating JMXFetch") raise
def _add_jmxinfo_tar(self): _, _, should_run_jmx = self._capture_output(self._should_run_jmx) if should_run_jmx: # status files (before listing beans because executing jmxfetch overwrites status files) for file_name, file_path in [ (JMXFiles._STATUS_FILE, JMXFiles.get_status_file_path()), (JMXFiles._PYTHON_STATUS_FILE, JMXFiles.get_python_status_file_path()) ]: if self._can_read(file_path, warn=False): self._add_file_tar( file_path, os.path.join('jmxinfo', file_name) ) # beans lists for command in ['list_matching_attributes', 'list_everything']: log.info(" * datadog-agent jmx {0} output".format(command)) self._add_command_output_tar( os.path.join('jmxinfo', '{0}.log'.format(command)), partial(self._jmx_command_call, command) ) # java version log.info(" * java -version output") _, _, java_bin_path = self._capture_output( lambda: JMXFetch.get_configuration(get_confd_path())[2] or 'java') self._add_command_output_tar( os.path.join('jmxinfo', 'java_version.log'), lambda: self._java_version(java_bin_path), command_desc="{0} -version".format(java_bin_path) )
def run(self): from config import initialize_logging initialize_logging('jmxfetch') if self.is_enabled: log.debug("Windows Service - Starting JMXFetch") JMXFiles.clean_exit_file() self.jmx_daemon.run() else: log.info("Windows Service - Not starting JMXFetch: no valid configuration found")
def configure(self, checks_list=None, clean_status_file=True): """ Instantiate JMXFetch parameters, clean potential previous run leftovers. """ if clean_status_file: JMXFiles.clean_status_file() self.jmx_checks, self.invalid_checks, self.java_bin_path, self.java_options, self.tools_jar_path = \ self.get_configuration(self.confd_path, checks_list=checks_list)
def stop(self): """ Override `stop` method to properly exit JMXFetch. """ if self._proc is not None and self._proc.is_running(): JMXFiles.write_exit_file() try: self._proc.wait(timeout=self._JMX_STOP_TIMEOUT) except psutil.TimeoutExpired: log.debug("JMXFetch process didn't stop after %ss, killing it", self._JMX_STOP_TIMEOUT) super(JMXFetchProcess, self).stop()
def run(self, command=None, checks_list=None, reporter=None, redirect_std_streams=False): """ Run JMXFetch redirect_std_streams: if left to False, the stdout and stderr of JMXFetch are streamed directly to the environment's stdout and stderr and cannot be retrieved via python's sys.stdout and sys.stderr. Set to True to redirect these streams to python's sys.stdout and sys.stderr. """ if checks_list or self.jmx_checks is None: # (Re)set/(re)configure JMXFetch parameters when `checks_list` is specified or # no configuration was found self.configure(checks_list) try: command = command or JMX_COLLECT_COMMAND if len(self.invalid_checks) > 0: try: JMXFiles.write_status_file(self.invalid_checks) except Exception: log.exception("Error while writing JMX status file") if len(self.jmx_checks) > 0: return self._start(self.java_bin_path, self.java_options, self.jmx_checks, command, reporter, self.tools_jar_path, self.custom_jar_paths, redirect_std_streams) else: # We're exiting purposefully, so exit with zero (supervisor's expected # code). HACK: Sleep a little bit so supervisor thinks we've started cleanly # and thus can exit cleanly. time.sleep(4) log.info("No valid JMX integration was found. Exiting ...") except Exception: log.exception("Error while initiating JMXFetch") raise
def configure(self, command=None, checks_list=None, clean_status_file=True): """ Instantiate JMXFetch parameters, clean potential previous run leftovers. """ if clean_status_file: JMXFiles.clean_status_file() self.jmx_checks, self.invalid_checks, self.java_bin_path, self.java_options, \ tools_jar_path, self.custom_jar_paths, use_attach_api = \ self.get_configuration(self.confd_path, checks_list=checks_list) # Setup the JDK `tool.jar` if command == JMX_LIST_JVMS: if not tools_jar_path: raise InvalidJMXConfiguration( u"Command `{}` requires access to the JDK `tools.jar` file. " u"See `tools_jar_path` parameter in JMX YAML configuration files.".format( JMX_LIST_JVMS ) ) use_attach_api = True self.tools_jar_path = tools_jar_path if use_attach_api else None
def start(self): if self.is_enabled(): JMXFiles.clean_exit_file() super(JMXFetchProcess, self).start()
def terminate(self): """ Override `terminate` method to properly exit JMXFetch. """ JMXFiles.write_exit_file() self.join()
def run(self): if self.is_enabled: JMXFiles.clean_exit_file() self.jmx_daemon.run()
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams): if reporter is None: statsd_host = self.agentConfig.get('bind_host', 'localhost') statsd_port = self.agentConfig.get('monitorstatsd_port', "8125") reporter = "statsd:%s:%s" % (statsd_host, statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() classpath = path_to_jmxfetch if tools_jar_path is not None: classpath = r"%s:%s" % (tools_jar_path, classpath) if custom_jar_paths: classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath) subprocess_args = [ path_to_java, '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), '--conf_directory', r"%s" % self.confd_path, '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"), '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'), '--reporter', reporter, '--status_location', r"%s" % path_to_status_file, command, ] if Platform.is_windows(): path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert( len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert( len(subprocess_args) - 1, path_to_exit_file) subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) with nested(tempfile.TemporaryFile(), tempfile.TemporaryFile()) as (stdout_f, stderr_f): jmx_process = subprocess.Popen( subprocess_args, close_fds=not redirect_std_streams, stdout=stdout_f if redirect_std_streams else None, stderr=stderr_f if redirect_std_streams else None) self.jmx_process = jmx_process self.register_signal_handlers() jmx_process.wait() if redirect_std_streams: stderr_f.seek(0) err = stderr_f.read() stdout_f.seek(0) out = stdout_f.read() sys.stdout.write(out) sys.stderr.write(err) return jmx_process.returncode except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.exception("Couldn't launch JMXFetch") raise
def terminate(self): JMXFiles.write_exit_file() self.join()
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams): if reporter is None: statsd_host = self.agent_config.get('bind_host', 'localhost') if statsd_host == "0.0.0.0": # If statsd is bound to all interfaces, just use localhost for clients statsd_host = "localhost" statsd_port = self.agent_config.get('dogstatsd_port', "8125") reporter = "statsd:%s:%s" % (statsd_host, statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() classpath = path_to_jmxfetch if tools_jar_path is not None: classpath = r"%s:%s" % (tools_jar_path, classpath) if custom_jar_paths: classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath) if self.config_jar_path: classpath = r"%s:%s" % (self.config_jar_path, classpath) subprocess_args = [ path_to_java, # Path to the java bin '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), # Period of the main loop of jmxfetch in ms '--conf_directory', r"%s" % self.confd_path, # Path of the conf.d directory that will be read by jmxfetch, '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"), # Log Level: Mapping from Python log level to log4j log levels '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'), # Path of the log file '--reporter', reporter, # Reporter to use '--status_location', r"%s" % path_to_status_file, # Path to the status file to write command, # Name of the command ] if Platform.is_windows(): # Signal handlers are not supported on Windows: # use a file to trigger JMXFetch exit instead path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert(len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert(len(subprocess_args) - 1, path_to_exit_file) if self.service_discovery: pipe_path = get_jmx_pipe_path() subprocess_args.insert(4, '--tmp_directory') subprocess_args.insert(5, pipe_path) subprocess_args.insert(4, '--sd_pipe') subprocess_args.insert(5, SD_PIPE_NAME) subprocess_args.insert(4, '--sd_enabled') if jmx_checks: subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) # Specify a maximum memory allocation pool for the JVM if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_SD_MAX_MEMORY_ALLOCATION if self.service_discovery else _JVM_DEFAULT_MAX_MEMORY_ALLOCATION # Specify the initial memory allocation pool for the JVM if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) return self.execute(subprocess_args, redirect_std_streams) except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.info("unable to launch JMXFetch") raise
def get_jmx_status(): """This function tries to read the 2 jmxfetch status file which are yaml file located in the temp directory. There are 2 files: - One generated by the Agent itself, for jmx checks that can't be initialized because there are missing stuff. Its format is as following: ### invalid_checks: jmx: !!python/object/apply:jmxfetch.InvalidJMXConfiguration [You need to have at least one instance defined in the YAML file for this check] timestamp: 1391040927.136523 ### - One generated by jmxfetch that return information about the collection of metrics its format is as following: ### timestamp: 1391037347435 checks: failed_checks: jmx: - {message: Unable to create instance. Please check your yaml file, status: ERROR} initialized_checks: tomcat: - {message: null, status: OK, metric_count: 7, instance_name: jmx-remihakim.fr-3000} ### """ check_statuses = [] java_status_path = JMXFiles.get_status_file_path() python_status_path = JMXFiles.get_python_status_file_path() if not os.path.exists(java_status_path) and not os.path.exists(python_status_path): log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path)) return [] check_data = defaultdict(lambda: defaultdict(list)) try: if os.path.exists(java_status_path): java_jmx_stats = yaml.load(file(java_status_path)) status_age = time.time() - java_jmx_stats.get('timestamp')/1000 # JMX timestamp is saved in milliseconds jmx_checks = java_jmx_stats.get('checks', {}) if status_age > 60: check_statuses.append( CheckStatus("jmx", [ InstanceStatus( 0, STATUS_ERROR, error="JMXfetch didn't return any metrics during the last minute" ) ]) ) else: for check_name, instances in jmx_checks.get('failed_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append(metric_count) check_data[check_name]['service_check_count'].append(service_check_count) for check_name, instances in jmx_checks.get('initialized_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append(metric_count) check_data[check_name]['service_check_count'].append(service_check_count) for check_name, data in check_data.iteritems(): check_status = CheckStatus(check_name, data['statuses'], metric_count=sum(data['metric_count']), service_check_count=sum(data['service_check_count'])) check_statuses.append(check_status) if os.path.exists(python_status_path): python_jmx_stats = yaml.load(file(python_status_path)) jmx_checks = python_jmx_stats.get('invalid_checks', {}) for check_name, excep in jmx_checks.iteritems(): check_statuses.append(CheckStatus(check_name, [], init_failed_error=excep)) return check_statuses except Exception: log.exception("Couldn't load latest jmx status") return []
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams): statsd_port = self.agentConfig.get('dogstatsd_port', "8125") if reporter is None: reporter = "statsd:%s" % str(statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() classpath = path_to_jmxfetch if tools_jar_path is not None: classpath = r"%s:%s" % (tools_jar_path, classpath) if custom_jar_paths: classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath) subprocess_args = [ path_to_java, # Path to the java bin '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), # Period of the main loop of jmxfetch in ms '--conf_directory', r"%s" % self. confd_path, # Path of the conf.d directory that will be read by jmxfetch, '--log_level', JAVA_LOGGING_LEVEL.get( self.logging_config.get("log_level"), "INFO" ), # Log Level: Mapping from Python log level to log4j log levels '--log_location', r"%s" % self.logging_config.get( 'jmxfetch_log_file'), # Path of the log file '--reporter', reporter, # Reporter to use '--status_location', r"%s" % path_to_status_file, # Path to the status file to write command, # Name of the command ] if Platform.is_windows(): # Signal handlers are not supported on Windows: # use a file to trigger JMXFetch exit instead path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert( len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert( len(subprocess_args) - 1, path_to_exit_file) subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) # Specify a maximum memory allocation pool for the JVM if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION # Specify the initial memory allocation pool for the JVM if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) # Launch JMXfetch subprocess manually, w/o get_subprocess_output(), since it's a special case with nested(tempfile.TemporaryFile('rw'), tempfile.TemporaryFile('rw')) as (stdout_f, stderr_f): jmx_process = subprocess.Popen( subprocess_args, close_fds= not redirect_std_streams, # only set to True when the streams are not redirected, for WIN compatibility stdout=stdout_f if redirect_std_streams else None, stderr=stderr_f if redirect_std_streams else None) self.jmx_process = jmx_process # Register SIGINT and SIGTERM signal handlers self.register_signal_handlers() if redirect_std_streams: # Wait for JMXFetch to return, and write out the stdout and stderr of JMXFetch to sys.stdout and sys.stderr out, err = jmx_process.communicate() sys.stdout.write(out) sys.stderr.write(err) else: # Wait for JMXFetch to return jmx_process.wait() return jmx_process.returncode except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.exception("Couldn't launch JMXFetch") raise
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams): if reporter is None: statsd_host = self.agent_config.get('bind_host', 'localhost') if statsd_host == "0.0.0.0": # If statsd is bound to all interfaces, just use localhost for clients statsd_host = "localhost" statsd_port = self.agent_config.get('dogstatsd_port', "8125") reporter = "statsd:%s:%s" % (statsd_host, statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() classpath = path_to_jmxfetch if tools_jar_path is not None: classpath = r"%s:%s" % (tools_jar_path, classpath) if custom_jar_paths: classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath) subprocess_args = [ path_to_java, # Path to the java bin '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), # Period of the main loop of jmxfetch in ms '--conf_directory', r"%s" % self. confd_path, # Path of the conf.d directory that will be read by jmxfetch, '--log_level', JAVA_LOGGING_LEVEL.get( self.logging_config.get("log_level"), "INFO" ), # Log Level: Mapping from Python log level to log4j log levels '--log_location', r"%s" % self.logging_config.get( 'jmxfetch_log_file'), # Path of the log file '--reporter', reporter, # Reporter to use '--status_location', r"%s" % path_to_status_file, # Path to the status file to write command, # Name of the command ] if Platform.is_windows(): # Signal handlers are not supported on Windows: # use a file to trigger JMXFetch exit instead path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert( len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert( len(subprocess_args) - 1, path_to_exit_file) if self.service_discovery: pipe_path = get_jmx_pipe_path() subprocess_args.insert(4, '--tmp_directory') subprocess_args.insert(5, pipe_path) subprocess_args.insert(4, '--sd_pipe') subprocess_args.insert(5, SD_PIPE_NAME) subprocess_args.insert(4, '--sd_enabled') if jmx_checks: subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) # Specify a maximum memory allocation pool for the JVM if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_SD_MAX_MEMORY_ALLOCATION if self.service_discovery else _JVM_DEFAULT_MAX_MEMORY_ALLOCATION # Specify the initial memory allocation pool for the JVM if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) return self.execute(subprocess_args, redirect_std_streams) except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.info("unable to launch JMXFetch") raise
def _populate_payload_metadata(self, payload, check_statuses, start_event=True): """ Periodically populate the payload with metadata related to the system, host, and/or checks. """ now = time.time() # Include system stats on first postback if start_event and self._is_first_run(): payload["systemStats"] = self.agentConfig.get("system_stats", {}) # Also post an event in the newsfeed payload["events"]["System"] = [ { "api_key": self.agentConfig["api_key"], "host": payload["internalHostname"], "timestamp": now, "event_type": "Agent Startup", "msg_text": "Version %s" % get_version(), } ] # Periodically send the host metadata. if self._should_send_additional_data("host_metadata"): # gather metadata with gohai try: if not Platform.is_windows(): command = "gohai" else: command = "gohai\gohai.exe" gohai_metadata, gohai_err, _ = get_subprocess_output([command], log) payload["gohai"] = gohai_metadata if gohai_err: log.warning("GOHAI LOG | {0}".format(gohai_err)) except OSError as e: if e.errno == 2: # file not found, expected when install from source log.info("gohai file not found") else: raise e except Exception as e: log.warning("gohai command failed with error %s" % str(e)) payload["systemStats"] = get_system_stats() payload["meta"] = self._get_hostname_metadata() self.hostname_metadata_cache = payload["meta"] # Add static tags from the configuration file host_tags = [] if self.agentConfig["tags"] is not None: host_tags.extend([unicode(tag.strip()) for tag in self.agentConfig["tags"].split(",")]) if self.agentConfig["collect_ec2_tags"]: host_tags.extend(EC2.get_tags(self.agentConfig)) if host_tags: payload["host-tags"]["system"] = host_tags # If required by the user, let's create the dd_check:xxx host tags if self.agentConfig["create_dd_check_tags"]: app_tags_list = [DD_CHECK_TAG.format(c.name) for c in self.initialized_checks_d] app_tags_list.extend([DD_CHECK_TAG.format(cname) for cname in JMXFiles.get_jmx_appnames()]) if "system" not in payload["host-tags"]: payload["host-tags"]["system"] = [] payload["host-tags"]["system"].extend(app_tags_list) GCE_tags = GCE.get_tags(self.agentConfig) if GCE_tags is not None: payload["host-tags"][GCE.SOURCE_TYPE_NAME] = GCE_tags # Log the metadata on the first run if self._is_first_run(): log.info("Hostnames: %s, tags: %s" % (repr(self.hostname_metadata_cache), payload["host-tags"])) # Periodically send extra hosts metadata (vsphere) # Metadata of hosts that are not the host where the agent runs, not all the checks use # that external_host_tags = [] if self._should_send_additional_data("external_host_tags"): for check in self.initialized_checks_d: try: getter = getattr(check, "get_external_host_tags") check_tags = getter() external_host_tags.extend(check_tags) except AttributeError: pass if external_host_tags: payload["external_host_tags"] = external_host_tags # Periodically send agent_checks metadata if self._should_send_additional_data("agent_checks"): # Add agent checks statuses and error/warning messages agent_checks = [] for check in check_statuses: if check.instance_statuses is not None: for i, instance_status in enumerate(check.instance_statuses): agent_checks.append( ( check.name, check.source_type_name, instance_status.instance_id, instance_status.status, # put error message or list of warning messages in the same field # it will be handled by the UI instance_status.error or instance_status.warnings or "", check.service_metadata[i], ) ) else: agent_checks.append( ( check.name, check.source_type_name, "initialization", check.status, repr(check.init_failed_error), ) ) payload["agent_checks"] = agent_checks payload["meta"] = self.hostname_metadata_cache # add hostname metadata
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, redirect_std_streams): statsd_port = self.agentConfig.get('dogstatsd_port', "8125") if reporter is None: reporter = "statsd:%s" % str(statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() if tools_jar_path is None: classpath = path_to_jmxfetch else: classpath = r"%s:%s" % (tools_jar_path, path_to_jmxfetch) subprocess_args = [ path_to_java, # Path to the java bin '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), # Period of the main loop of jmxfetch in ms '--conf_directory', r"%s" % self.confd_path, # Path of the conf.d directory that will be read by jmxfetch, '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"), # Log Level: Mapping from Python log level to log4j log levels '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'), # Path of the log file '--reporter', reporter, # Reporter to use '--status_location', r"%s" % path_to_status_file, # Path to the status file to write command, # Name of the command ] if Platform.is_windows(): # Signal handlers are not supported on Windows: # use a file to trigger JMXFetch exit instead path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert(len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert(len(subprocess_args) - 1, path_to_exit_file) subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) # Specify a maximum memory allocation pool for the JVM if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION # Specify the initial memory allocation pool for the JVM if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) # Launch JMXfetch subprocess jmx_process = subprocess.Popen( subprocess_args, close_fds=not redirect_std_streams, # set to True instead of False when the streams are redirected for WIN compatibility stdout=subprocess.PIPE if redirect_std_streams else None, stderr=subprocess.PIPE if redirect_std_streams else None ) self.jmx_process = jmx_process # Register SIGINT and SIGTERM signal handlers self.register_signal_handlers() if redirect_std_streams: # Wait for JMXFetch to return, and write out the stdout and stderr of JMXFetch to sys.stdout and sys.stderr out, err = jmx_process.communicate() sys.stdout.write(out) sys.stderr.write(err) else: # Wait for JMXFetch to return jmx_process.wait() return jmx_process.returncode except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.exception("Couldn't launch JMXFetch") raise
def _populate_payload_metadata(self, payload, check_statuses, start_event=True): now = time.time() if start_event and self._is_first_run(): payload['systemStats'] = self.agentConfig.get('system_stats', {}) payload['events']['System'] = [{ 'api_key': self.agentConfig['api_key'], 'host': payload['internalHostname'], 'timestamp': now, 'event_type': 'Agent Startup', 'msg_text': 'Version %s' % get_version() }] if self._should_send_additional_data('host_metadata'): payload['gohai'] = get_gohai_data() payload['systemStats'] = get_system_stats() payload['meta'] = self._get_hostname_metadata() log.info('GOHAI data: {0}'.format(payload['gohai'])) self.hostname_metadata_cache = payload['meta'] host_tags = [] if self.agentConfig['tags'] is not None: host_tags.extend([tag.strip() for tag in self.agentConfig['tags'].split(",")]) if self.agentConfig['collect_ec2_tags']: host_tags.extend(EC2.get_tags(self.agentConfig)) if host_tags: payload['host-tags']['system'] = host_tags if self.agentConfig['create_dd_check_tags']: app_tags_list = [DD_CHECK_TAG.format(c.name) for c in self.initialized_checks_d] app_tags_list.extend([DD_CHECK_TAG.format(cname) for cname in JMXFiles.get_jmx_appnames()]) if 'system' not in payload['host-tags']: payload['host-tags']['system'] = [] payload['host-tags']['system'].extend(app_tags_list) GCE_tags = GCE.get_tags(self.agentConfig) if GCE_tags is not None: payload['host-tags'][GCE.SOURCE_TYPE_NAME] = GCE_tags if self._is_first_run(): log.info("Hostnames: %s, tags: %s" % (repr(self.hostname_metadata_cache), payload['host-tags'])) external_host_tags = [] if self._should_send_additional_data('external_host_tags'): for check in self.initialized_checks_d: try: getter = getattr(check, 'get_external_host_tags') check_tags = getter() external_host_tags.extend(check_tags) except AttributeError: pass if external_host_tags: payload['external_host_tags'] = external_host_tags if self._should_send_additional_data('agent_checks'): agent_checks = [] for check in check_statuses: if check.instance_statuses is not None: for i, instance_status in enumerate(check.instance_statuses): agent_checks.append( ( check.name, check.source_type_name, instance_status.instance_id, instance_status.status, instance_status.error or instance_status.warnings or "", check.service_metadata[i] ) ) else: agent_checks.append( ( check.name, check.source_type_name, "initialization", check.status, repr(check.init_failed_error) ) ) payload['agent_checks'] = agent_checks payload['meta'] = self.hostname_metadata_cache
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams): if reporter is None: statsd_host = self.agentConfig.get('bind_host', 'localhost') if statsd_host == "0.0.0.0": # If statsd is bound to all interfaces, just use localhost for clients statsd_host = "localhost" statsd_port = self.agentConfig.get('dogstatsd_port', "8125") reporter = "statsd:%s:%s" % (statsd_host, statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() classpath = path_to_jmxfetch if tools_jar_path is not None: classpath = r"%s:%s" % (tools_jar_path, classpath) if custom_jar_paths: classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath) subprocess_args = [ path_to_java, # Path to the java bin '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), # Period of the main loop of jmxfetch in ms '--conf_directory', r"%s" % self.confd_path, # Path of the conf.d directory that will be read by jmxfetch, '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"), # Log Level: Mapping from Python log level to log4j log levels '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'), # Path of the log file '--reporter', reporter, # Reporter to use '--status_location', r"%s" % path_to_status_file, # Path to the status file to write command, # Name of the command ] if Platform.is_windows(): # Signal handlers are not supported on Windows: # use a file to trigger JMXFetch exit instead path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert(len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert(len(subprocess_args) - 1, path_to_exit_file) if self.service_discovery: pipe_path = get_jmx_pipe_path() subprocess_args.insert(4, '--tmp_directory') subprocess_args.insert(5, pipe_path) subprocess_args.insert(4, '--sd_standby') if jmx_checks: subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) # Specify a maximum memory allocation pool for the JVM if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_SD_MAX_MEMORY_ALLOCATION if self.service_discovery else _JVM_DEFAULT_MAX_MEMORY_ALLOCATION # Specify the initial memory allocation pool for the JVM if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) # Launch JMXfetch subprocess manually, w/o get_subprocess_output(), since it's a special case with nested(tempfile.TemporaryFile(), tempfile.TemporaryFile()) as (stdout_f, stderr_f): jmx_process = subprocess.Popen( subprocess_args, close_fds=not redirect_std_streams, # only set to True when the streams are not redirected, for WIN compatibility stdout=stdout_f if redirect_std_streams else None, stderr=stderr_f if redirect_std_streams else None ) self.jmx_process = jmx_process # Register SIGINT and SIGTERM signal handlers self.register_signal_handlers() # Wait for JMXFetch to return jmx_process.wait() if redirect_std_streams: # Write out the stdout and stderr of JMXFetch to sys.stdout and sys.stderr stderr_f.seek(0) err = stderr_f.read() stdout_f.seek(0) out = stdout_f.read() sys.stdout.write(out) sys.stderr.write(err) return jmx_process.returncode except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.exception("Couldn't launch JMXFetch") raise
def _populate_payload_metadata(self, payload, check_statuses, start_event=True): """ Periodically populate the payload with metadata related to the system, host, and/or checks. """ now = time.time() # Include system stats on first postback if start_event and self._is_first_run(): payload['systemStats'] = self.agentConfig.get('system_stats', {}) # Also post an event in the newsfeed payload['events']['System'] = [{ 'api_key': self.agentConfig['api_key'], 'host': payload['internalHostname'], 'timestamp': now, 'event_type':'Agent Startup', 'msg_text': 'Version %s' % get_version() }] # Periodically send the host metadata. if self._should_send_additional_data('host_metadata'): # gather metadata with gohai gohai_metadata = self._run_gohai_metadata() if gohai_metadata: payload['gohai'] = gohai_metadata payload['systemStats'] = get_system_stats( proc_path=self.agentConfig.get('procfs_path', '/proc').rstrip('/') ) payload['meta'] = self._get_hostname_metadata() self.hostname_metadata_cache = payload['meta'] # Add static tags from the configuration file host_tags = [] if self.agentConfig['tags'] is not None: host_tags.extend([unicode(tag.strip()) for tag in self.agentConfig['tags'].split(",")]) if self.agentConfig['collect_ec2_tags']: host_tags.extend(EC2.get_tags(self.agentConfig)) if host_tags: payload['host-tags']['system'] = host_tags # If required by the user, let's create the dd_check:xxx host tags if self.agentConfig['create_dd_check_tags']: app_tags_list = [DD_CHECK_TAG.format(c.name) for c in self.initialized_checks_d] app_tags_list.extend([DD_CHECK_TAG.format(cname) for cname in JMXFiles.get_jmx_appnames()]) if 'system' not in payload['host-tags']: payload['host-tags']['system'] = [] payload['host-tags']['system'].extend(app_tags_list) GCE_tags = GCE.get_tags(self.agentConfig) if GCE_tags is not None: payload['host-tags'][GCE.SOURCE_TYPE_NAME] = GCE_tags # Log the metadata on the first run if self._is_first_run(): log.info("Hostnames: %s, tags: %s" % (repr(self.hostname_metadata_cache), payload['host-tags'])) # Periodically send extra hosts metadata (vsphere) # Metadata of hosts that are not the host where the agent runs, not all the checks use # that external_host_tags = [] if self._should_send_additional_data('external_host_tags'): for check in self.initialized_checks_d: try: getter = getattr(check, 'get_external_host_tags') check_tags = getter() external_host_tags.extend(check_tags) except AttributeError: pass if external_host_tags: payload['external_host_tags'] = external_host_tags # Periodically send agent_checks metadata if self._should_send_additional_data('agent_checks'): # Add agent checks statuses and error/warning messages agent_checks = [] for check in check_statuses: if check.instance_statuses is not None: for i, instance_status in enumerate(check.instance_statuses): agent_checks.append( ( check.name, check.source_type_name, instance_status.instance_id, instance_status.status, # put error message or list of warning messages in the same field # it will be handled by the UI instance_status.error or instance_status.warnings or "", check.service_metadata[i] ) ) else: agent_checks.append( ( check.name, check.source_type_name, "initialization", check.status, repr(check.init_failed_error) ) ) payload['agent_checks'] = agent_checks payload['meta'] = self.hostname_metadata_cache # add hostname metadata
def get_jmx_status(): check_statuses = [] java_status_path = JMXFiles.get_status_file_path() python_status_path = JMXFiles.get_python_status_file_path() if not os.path.exists(java_status_path) and not os.path.exists( python_status_path): log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path)) return [] check_data = defaultdict(lambda: defaultdict(list)) try: if os.path.exists(java_status_path): java_jmx_stats = yaml.load(file(java_status_path)) status_age = time.time() - java_jmx_stats.get('timestamp') / 1000 jmx_checks = java_jmx_stats.get('checks', {}) if status_age > 60: check_statuses.append( CheckStatus("jmx", [ InstanceStatus( 0, STATUS_ERROR, error= "JMXfetch didn't return any metrics during the last minute" ) ])) else: for check_name, instances in jmx_checks.get( 'failed_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append( get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append( metric_count) check_data[check_name]['service_check_count'].append( service_check_count) for check_name, instances in jmx_checks.get( 'initialized_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append( get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append( metric_count) check_data[check_name]['service_check_count'].append( service_check_count) for check_name, data in check_data.iteritems(): check_status = CheckStatus( check_name, data['statuses'], metric_count=sum(data['metric_count']), service_check_count=sum(data['service_check_count'])) check_statuses.append(check_status) if os.path.exists(python_status_path): python_jmx_stats = yaml.load(file(python_status_path)) jmx_checks = python_jmx_stats.get('invalid_checks', {}) for check_name, excep in jmx_checks.iteritems(): check_statuses.append( CheckStatus(check_name, [], init_failed_error=excep)) return check_statuses except Exception: log.exception("Couldn't load latest jmx status") return []