def _add_jmxinfo_tar(self): _, _, should_run_jmx = self._capture_output(self._should_run_jmx) if should_run_jmx: # status files (before listing beans because executing jmxfetch overwrites status files) for file_name, file_path in [ (JMXFiles._STATUS_FILE, JMXFiles.get_status_file_path()), (JMXFiles._PYTHON_STATUS_FILE, JMXFiles.get_python_status_file_path()) ]: if self._can_read(file_path, warn=False): self._add_file_tar( file_path, os.path.join('jmxinfo', file_name) ) # beans lists for command in ['list_matching_attributes', 'list_everything']: log.info(" * datadog-agent jmx {0} output".format(command)) self._add_command_output_tar( os.path.join('jmxinfo', '{0}.log'.format(command)), partial(self._jmx_command_call, command) ) # java version log.info(" * java -version output") _, _, java_bin_path = self._capture_output( lambda: JMXFetch.get_configuration(get_confd_path())[2] or 'java') self._add_command_output_tar( os.path.join('jmxinfo', 'java_version.log'), lambda: self._java_version(java_bin_path), command_desc="{0} -version".format(java_bin_path) )
def _add_jmxinfo_tar(self): _, _, should_run_jmx = self._capture_output(self._should_run_jmx) if should_run_jmx: # status files (before listing beans because executing jmxfetch overwrites status files) for file_name, file_path in [ (JMXFiles._STATUS_FILE, JMXFiles.get_status_file_path()), (JMXFiles._PYTHON_STATUS_FILE, JMXFiles.get_python_status_file_path()) ]: if self._can_read(file_path, warn=False): self._add_file_tar(file_path, os.path.join('jmxinfo', file_name)) # beans lists for command in ['list_matching_attributes', 'list_everything']: log.info(" * datadog-agent jmx {0} output".format(command)) self._add_command_output_tar( os.path.join('jmxinfo', '{0}.log'.format(command)), partial(self._jmx_command_call, command)) # java version log.info(" * java -version output") _, _, java_bin_path = self._capture_output( lambda: JMXFetch.get_configuration(get_confd_path())[ 2] or 'java') self._add_command_output_tar( os.path.join('jmxinfo', 'java_version.log'), lambda: self._java_version(java_bin_path), command_desc="{0} -version".format(java_bin_path))
def get_jmx_status(): """This function tries to read the 2 jmxfetch status file which are yaml file located in the temp directory. There are 2 files: - One generated by the Agent itself, for jmx checks that can't be initialized because there are missing stuff. Its format is as following: ### invalid_checks: jmx: !!python/object/apply:jmxfetch.InvalidJMXConfiguration [You need to have at least one instance defined in the YAML file for this check] timestamp: 1391040927.136523 ### - One generated by jmxfetch that return information about the collection of metrics its format is as following: ### timestamp: 1391037347435 checks: failed_checks: jmx: - {message: Unable to create instance. Please check your yaml file, status: ERROR} initialized_checks: tomcat: - {message: null, status: OK, metric_count: 7, instance_name: jmx-remihakim.fr-3000} ### """ check_statuses = [] java_status_path = JMXFiles.get_status_file_path() python_status_path = JMXFiles.get_python_status_file_path() if not os.path.exists(java_status_path) and not os.path.exists(python_status_path): log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path)) return [] check_data = defaultdict(lambda: defaultdict(list)) try: if os.path.exists(java_status_path): java_jmx_stats = yaml.load(file(java_status_path)) status_age = time.time() - java_jmx_stats.get('timestamp')/1000 # JMX timestamp is saved in milliseconds jmx_checks = java_jmx_stats.get('checks', {}) if status_age > 60: check_statuses.append( CheckStatus("jmx", [ InstanceStatus( 0, STATUS_ERROR, error="JMXfetch didn't return any metrics during the last minute" ) ]) ) else: for check_name, instances in jmx_checks.get('failed_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append(metric_count) check_data[check_name]['service_check_count'].append(service_check_count) for check_name, instances in jmx_checks.get('initialized_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append(metric_count) check_data[check_name]['service_check_count'].append(service_check_count) for check_name, data in check_data.iteritems(): check_status = CheckStatus(check_name, data['statuses'], metric_count=sum(data['metric_count']), service_check_count=sum(data['service_check_count'])) check_statuses.append(check_status) if os.path.exists(python_status_path): python_jmx_stats = yaml.load(file(python_status_path)) jmx_checks = python_jmx_stats.get('invalid_checks', {}) for check_name, excep in jmx_checks.iteritems(): check_statuses.append(CheckStatus(check_name, [], init_failed_error=excep)) return check_statuses except Exception: log.exception("Couldn't load latest jmx status") return []
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams): if reporter is None: statsd_host = self.agentConfig.get('bind_host', 'localhost') if statsd_host == "0.0.0.0": # If statsd is bound to all interfaces, just use localhost for clients statsd_host = "localhost" statsd_port = self.agentConfig.get('dogstatsd_port', "8125") reporter = "statsd:%s:%s" % (statsd_host, statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() classpath = path_to_jmxfetch if tools_jar_path is not None: classpath = r"%s:%s" % (tools_jar_path, classpath) if custom_jar_paths: classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath) subprocess_args = [ path_to_java, # Path to the java bin '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), # Period of the main loop of jmxfetch in ms '--conf_directory', r"%s" % self.confd_path, # Path of the conf.d directory that will be read by jmxfetch, '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"), # Log Level: Mapping from Python log level to log4j log levels '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'), # Path of the log file '--reporter', reporter, # Reporter to use '--status_location', r"%s" % path_to_status_file, # Path to the status file to write command, # Name of the command ] if Platform.is_windows(): # Signal handlers are not supported on Windows: # use a file to trigger JMXFetch exit instead path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert(len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert(len(subprocess_args) - 1, path_to_exit_file) if self.service_discovery: pipe_path = get_jmx_pipe_path() subprocess_args.insert(4, '--tmp_directory') subprocess_args.insert(5, pipe_path) subprocess_args.insert(4, '--sd_standby') if jmx_checks: subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) # Specify a maximum memory allocation pool for the JVM if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_SD_MAX_MEMORY_ALLOCATION if self.service_discovery else _JVM_DEFAULT_MAX_MEMORY_ALLOCATION # Specify the initial memory allocation pool for the JVM if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) # Launch JMXfetch subprocess manually, w/o get_subprocess_output(), since it's a special case with nested(tempfile.TemporaryFile(), tempfile.TemporaryFile()) as (stdout_f, stderr_f): jmx_process = subprocess.Popen( subprocess_args, close_fds=not redirect_std_streams, # only set to True when the streams are not redirected, for WIN compatibility stdout=stdout_f if redirect_std_streams else None, stderr=stderr_f if redirect_std_streams else None ) self.jmx_process = jmx_process # Register SIGINT and SIGTERM signal handlers self.register_signal_handlers() # Wait for JMXFetch to return jmx_process.wait() if redirect_std_streams: # Write out the stdout and stderr of JMXFetch to sys.stdout and sys.stderr stderr_f.seek(0) err = stderr_f.read() stdout_f.seek(0) out = stdout_f.read() sys.stdout.write(out) sys.stderr.write(err) return jmx_process.returncode except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.exception("Couldn't launch JMXFetch") raise
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams): statsd_port = self.agentConfig.get('dogstatsd_port', "8125") if reporter is None: reporter = "statsd:%s" % str(statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() classpath = path_to_jmxfetch if tools_jar_path is not None: classpath = r"%s:%s" % (tools_jar_path, classpath) if custom_jar_paths: classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath) subprocess_args = [ path_to_java, # Path to the java bin '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), # Period of the main loop of jmxfetch in ms '--conf_directory', r"%s" % self. confd_path, # Path of the conf.d directory that will be read by jmxfetch, '--log_level', JAVA_LOGGING_LEVEL.get( self.logging_config.get("log_level"), "INFO" ), # Log Level: Mapping from Python log level to log4j log levels '--log_location', r"%s" % self.logging_config.get( 'jmxfetch_log_file'), # Path of the log file '--reporter', reporter, # Reporter to use '--status_location', r"%s" % path_to_status_file, # Path to the status file to write command, # Name of the command ] if Platform.is_windows(): # Signal handlers are not supported on Windows: # use a file to trigger JMXFetch exit instead path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert( len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert( len(subprocess_args) - 1, path_to_exit_file) subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) # Specify a maximum memory allocation pool for the JVM if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION # Specify the initial memory allocation pool for the JVM if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) # Launch JMXfetch subprocess manually, w/o get_subprocess_output(), since it's a special case with nested(tempfile.TemporaryFile('rw'), tempfile.TemporaryFile('rw')) as (stdout_f, stderr_f): jmx_process = subprocess.Popen( subprocess_args, close_fds= not redirect_std_streams, # only set to True when the streams are not redirected, for WIN compatibility stdout=stdout_f if redirect_std_streams else None, stderr=stderr_f if redirect_std_streams else None) self.jmx_process = jmx_process # Register SIGINT and SIGTERM signal handlers self.register_signal_handlers() if redirect_std_streams: # Wait for JMXFetch to return, and write out the stdout and stderr of JMXFetch to sys.stdout and sys.stderr out, err = jmx_process.communicate() sys.stdout.write(out) sys.stderr.write(err) else: # Wait for JMXFetch to return jmx_process.wait() return jmx_process.returncode except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.exception("Couldn't launch JMXFetch") raise
def get_jmx_status(): """This function tries to read the 2 jmxfetch status file which are yaml file located in the temp directory. There are 2 files: - One generated by the Agent itself, for jmx checks that can't be initialized because there are missing stuff. Its format is as following: ### invalid_checks: jmx: !!python/object/apply:jmxfetch.InvalidJMXConfiguration [You need to have at least one instance defined in the YAML file for this check] timestamp: 1391040927.136523 ### - One generated by jmxfetch that return information about the collection of metrics its format is as following: ### timestamp: 1391037347435 checks: failed_checks: jmx: - {message: Unable to create instance. Please check your yaml file, status: ERROR} initialized_checks: tomcat: - {message: null, status: OK, metric_count: 7, instance_name: jmx-remihakim.fr-3000} ### """ check_statuses = [] java_status_path = JMXFiles.get_status_file_path() python_status_path = JMXFiles.get_python_status_file_path() if not os.path.exists(java_status_path) and not os.path.exists(python_status_path): log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path)) return [] check_data = defaultdict(lambda: defaultdict(list)) try: if os.path.exists(java_status_path): java_jmx_stats = yaml.load(file(java_status_path)) status_age = time.time() - java_jmx_stats.get('timestamp')/1000 # JMX timestamp is saved in milliseconds jmx_checks = java_jmx_stats.get('checks', {}) if status_age > 60: check_statuses.append( CheckStatus("jmx", [ InstanceStatus( 0, STATUS_ERROR, error="JMXfetch didn't return any metrics during the last minute" ) ]) ) else: for check_name, instances in jmx_checks.get('failed_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append(metric_count) check_data[check_name]['service_check_count'].append(service_check_count) for check_name, instances in jmx_checks.get('initialized_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append(get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append(metric_count) check_data[check_name]['service_check_count'].append(service_check_count) for check_name, data in check_data.iteritems(): check_status = CheckStatus(check_name, data['statuses'], metric_count=sum(data['metric_count']), service_check_count=sum(data['service_check_count'])) check_statuses.append(check_status) if os.path.exists(python_status_path): python_jmx_stats = yaml.load(file(python_status_path)) jmx_checks = python_jmx_stats.get('invalid_checks', {}) for check_name, excep in jmx_checks.iteritems(): check_statuses.append(CheckStatus(check_name, [], init_failed_error=excep)) return check_statuses except Exception: log.exception("Couldn't load latest jmx status") return []
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, redirect_std_streams): statsd_port = self.agentConfig.get('dogstatsd_port', "8125") if reporter is None: reporter = "statsd:%s" % str(statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() if tools_jar_path is None: classpath = path_to_jmxfetch else: classpath = r"%s:%s" % (tools_jar_path, path_to_jmxfetch) subprocess_args = [ path_to_java, # Path to the java bin '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), # Period of the main loop of jmxfetch in ms '--conf_directory', r"%s" % self.confd_path, # Path of the conf.d directory that will be read by jmxfetch, '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"), # Log Level: Mapping from Python log level to log4j log levels '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'), # Path of the log file '--reporter', reporter, # Reporter to use '--status_location', r"%s" % path_to_status_file, # Path to the status file to write command, # Name of the command ] if Platform.is_windows(): # Signal handlers are not supported on Windows: # use a file to trigger JMXFetch exit instead path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert(len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert(len(subprocess_args) - 1, path_to_exit_file) subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) # Specify a maximum memory allocation pool for the JVM if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION # Specify the initial memory allocation pool for the JVM if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) # Launch JMXfetch subprocess jmx_process = subprocess.Popen( subprocess_args, close_fds=not redirect_std_streams, # set to True instead of False when the streams are redirected for WIN compatibility stdout=subprocess.PIPE if redirect_std_streams else None, stderr=subprocess.PIPE if redirect_std_streams else None ) self.jmx_process = jmx_process # Register SIGINT and SIGTERM signal handlers self.register_signal_handlers() if redirect_std_streams: # Wait for JMXFetch to return, and write out the stdout and stderr of JMXFetch to sys.stdout and sys.stderr out, err = jmx_process.communicate() sys.stdout.write(out) sys.stderr.write(err) else: # Wait for JMXFetch to return jmx_process.wait() return jmx_process.returncode except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.exception("Couldn't launch JMXFetch") raise
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams): if reporter is None: statsd_host = self.agent_config.get('bind_host', 'localhost') if statsd_host == "0.0.0.0": # If statsd is bound to all interfaces, just use localhost for clients statsd_host = "localhost" statsd_port = self.agent_config.get('dogstatsd_port', "8125") reporter = "statsd:%s:%s" % (statsd_host, statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() classpath = path_to_jmxfetch if tools_jar_path is not None: classpath = r"%s:%s" % (tools_jar_path, classpath) if custom_jar_paths: classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath) subprocess_args = [ path_to_java, # Path to the java bin '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), # Period of the main loop of jmxfetch in ms '--conf_directory', r"%s" % self. confd_path, # Path of the conf.d directory that will be read by jmxfetch, '--log_level', JAVA_LOGGING_LEVEL.get( self.logging_config.get("log_level"), "INFO" ), # Log Level: Mapping from Python log level to log4j log levels '--log_location', r"%s" % self.logging_config.get( 'jmxfetch_log_file'), # Path of the log file '--reporter', reporter, # Reporter to use '--status_location', r"%s" % path_to_status_file, # Path to the status file to write command, # Name of the command ] if Platform.is_windows(): # Signal handlers are not supported on Windows: # use a file to trigger JMXFetch exit instead path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert( len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert( len(subprocess_args) - 1, path_to_exit_file) if self.service_discovery: pipe_path = get_jmx_pipe_path() subprocess_args.insert(4, '--tmp_directory') subprocess_args.insert(5, pipe_path) subprocess_args.insert(4, '--sd_pipe') subprocess_args.insert(5, SD_PIPE_NAME) subprocess_args.insert(4, '--sd_enabled') if jmx_checks: subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) # Specify a maximum memory allocation pool for the JVM if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_SD_MAX_MEMORY_ALLOCATION if self.service_discovery else _JVM_DEFAULT_MAX_MEMORY_ALLOCATION # Specify the initial memory allocation pool for the JVM if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) return self.execute(subprocess_args, redirect_std_streams) except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.info("unable to launch JMXFetch") raise
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams): if reporter is None: statsd_host = self.agent_config.get('bind_host', 'localhost') if statsd_host == "0.0.0.0": # If statsd is bound to all interfaces, just use localhost for clients statsd_host = "localhost" statsd_port = self.agent_config.get('dogstatsd_port', "8125") reporter = "statsd:%s:%s" % (statsd_host, statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() classpath = path_to_jmxfetch if tools_jar_path is not None: classpath = r"%s:%s" % (tools_jar_path, classpath) if custom_jar_paths: classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath) if self.config_jar_path: classpath = r"%s:%s" % (self.config_jar_path, classpath) subprocess_args = [ path_to_java, # Path to the java bin '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), # Period of the main loop of jmxfetch in ms '--conf_directory', r"%s" % self.confd_path, # Path of the conf.d directory that will be read by jmxfetch, '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"), # Log Level: Mapping from Python log level to log4j log levels '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'), # Path of the log file '--reporter', reporter, # Reporter to use '--status_location', r"%s" % path_to_status_file, # Path to the status file to write command, # Name of the command ] if Platform.is_windows(): # Signal handlers are not supported on Windows: # use a file to trigger JMXFetch exit instead path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert(len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert(len(subprocess_args) - 1, path_to_exit_file) if self.service_discovery: pipe_path = get_jmx_pipe_path() subprocess_args.insert(4, '--tmp_directory') subprocess_args.insert(5, pipe_path) subprocess_args.insert(4, '--sd_pipe') subprocess_args.insert(5, SD_PIPE_NAME) subprocess_args.insert(4, '--sd_enabled') if jmx_checks: subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) # Specify a maximum memory allocation pool for the JVM if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_SD_MAX_MEMORY_ALLOCATION if self.service_discovery else _JVM_DEFAULT_MAX_MEMORY_ALLOCATION # Specify the initial memory allocation pool for the JVM if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) return self.execute(subprocess_args, redirect_std_streams) except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.info("unable to launch JMXFetch") raise
def _start(self, path_to_java, java_run_opts, jmx_checks, command, reporter, tools_jar_path, custom_jar_paths, redirect_std_streams): if reporter is None: statsd_host = self.agentConfig.get('bind_host', 'localhost') statsd_port = self.agentConfig.get('monitorstatsd_port', "8125") reporter = "statsd:%s:%s" % (statsd_host, statsd_port) log.info("Starting jmxfetch:") try: path_to_java = path_to_java or "java" java_run_opts = java_run_opts or "" path_to_jmxfetch = self._get_path_to_jmxfetch() path_to_status_file = JMXFiles.get_status_file_path() classpath = path_to_jmxfetch if tools_jar_path is not None: classpath = r"%s:%s" % (tools_jar_path, classpath) if custom_jar_paths: classpath = r"%s:%s" % (':'.join(custom_jar_paths), classpath) subprocess_args = [ path_to_java, '-classpath', classpath, JMXFETCH_MAIN_CLASS, '--check_period', str(self.check_frequency * 1000), '--conf_directory', r"%s" % self.confd_path, '--log_level', JAVA_LOGGING_LEVEL.get(self.logging_config.get("log_level"), "INFO"), '--log_location', r"%s" % self.logging_config.get('jmxfetch_log_file'), '--reporter', reporter, '--status_location', r"%s" % path_to_status_file, command, ] if Platform.is_windows(): path_to_exit_file = JMXFiles.get_python_exit_file_path() subprocess_args.insert( len(subprocess_args) - 1, '--exit_file_location') subprocess_args.insert( len(subprocess_args) - 1, path_to_exit_file) subprocess_args.insert(4, '--check') for check in jmx_checks: subprocess_args.insert(5, check) if "Xmx" not in java_run_opts and "XX:MaxHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_MAX_MEMORY_ALLOCATION if "Xms" not in java_run_opts and "XX:InitialHeapSize" not in java_run_opts: java_run_opts += _JVM_DEFAULT_INITIAL_MEMORY_ALLOCATION for opt in java_run_opts.split(): subprocess_args.insert(1, opt) log.info("Running %s" % " ".join(subprocess_args)) with nested(tempfile.TemporaryFile(), tempfile.TemporaryFile()) as (stdout_f, stderr_f): jmx_process = subprocess.Popen( subprocess_args, close_fds=not redirect_std_streams, stdout=stdout_f if redirect_std_streams else None, stderr=stderr_f if redirect_std_streams else None) self.jmx_process = jmx_process self.register_signal_handlers() jmx_process.wait() if redirect_std_streams: stderr_f.seek(0) err = stderr_f.read() stdout_f.seek(0) out = stdout_f.read() sys.stdout.write(out) sys.stderr.write(err) return jmx_process.returncode except OSError: java_path_msg = "Couldn't launch JMXTerm. Is Java in your PATH ?" log.exception(java_path_msg) invalid_checks = {} for check in jmx_checks: check_name = check.split('.')[0] check_name = check_name.encode('ascii', 'ignore') invalid_checks[check_name] = java_path_msg JMXFiles.write_status_file(invalid_checks) raise except Exception: log.exception("Couldn't launch JMXFetch") raise
def get_jmx_status(): check_statuses = [] java_status_path = JMXFiles.get_status_file_path() python_status_path = JMXFiles.get_python_status_file_path() if not os.path.exists(java_status_path) and not os.path.exists( python_status_path): log.debug("There is no jmx_status file at: %s or at: %s" % (java_status_path, python_status_path)) return [] check_data = defaultdict(lambda: defaultdict(list)) try: if os.path.exists(java_status_path): java_jmx_stats = yaml.load(file(java_status_path)) status_age = time.time() - java_jmx_stats.get('timestamp') / 1000 jmx_checks = java_jmx_stats.get('checks', {}) if status_age > 60: check_statuses.append( CheckStatus("jmx", [ InstanceStatus( 0, STATUS_ERROR, error= "JMXfetch didn't return any metrics during the last minute" ) ])) else: for check_name, instances in jmx_checks.get( 'failed_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append( get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append( metric_count) check_data[check_name]['service_check_count'].append( service_check_count) for check_name, instances in jmx_checks.get( 'initialized_checks', {}).iteritems(): for info in instances: message = info.get('message', None) metric_count = info.get('metric_count', 0) service_check_count = info.get('service_check_count', 0) status = info.get('status') instance_name = info.get('instance_name', None) check_data[check_name]['statuses'].append( get_jmx_instance_status(instance_name, status, message, metric_count)) check_data[check_name]['metric_count'].append( metric_count) check_data[check_name]['service_check_count'].append( service_check_count) for check_name, data in check_data.iteritems(): check_status = CheckStatus( check_name, data['statuses'], metric_count=sum(data['metric_count']), service_check_count=sum(data['service_check_count'])) check_statuses.append(check_status) if os.path.exists(python_status_path): python_jmx_stats = yaml.load(file(python_status_path)) jmx_checks = python_jmx_stats.get('invalid_checks', {}) for check_name, excep in jmx_checks.iteritems(): check_statuses.append( CheckStatus(check_name, [], init_failed_error=excep)) return check_statuses except Exception: log.exception("Couldn't load latest jmx status") return []