def is_agent_running(self, fail_if_running=False, fail_if_not_running=False): """Returns true if the agent service is running, as determined by the pidfile. This will optionally raise an Exception with an appropriate error message if the agent is not running. @param fail_if_running: True if the method should raise an Exception with a message about where the pidfile was read from. @param fail_if_not_running: True if the method should raise an Exception with a message about where the pidfile was read from. @type fail_if_running: bool @type fail_if_not_running: bool @return: True if the agent process is already running. @rtype: bool @raise AgentAlreadyRunning @raise AgentNotRunning """ self._log_init_debug('Checking if agent is running %s' % scalyr_util.get_pid_tid()) pid = self.__read_pidfile() if fail_if_running and pid is not None: raise AgentAlreadyRunning('The pidfile %s exists and indicates it is running pid=%d' % ( self.__pidfile, pid)) if fail_if_not_running and pid is None: raise AgentNotRunning('The pidfile %s does not exist or listed process is not running.' % self.__pidfile) return pid is not None
def __write_checkpoint_state(self): """Writes the current checkpoint state to disk. This must be done periodically to ensure that if the agent process stops and starts up again, we pick up from where we left off copying each file. """ # Create the format that is expected. An overall JsonObject with the time when the file was written, # and then an entry for each file path. checkpoints = {} state = { 'time': time.time(), 'checkpoints': checkpoints, } for processor in self.__log_processors: checkpoints[processor.log_path] = processor.get_checkpoint() # We write to a temporary file and then rename it to the real file name to make the write more atomic. # We have had problems in the past with corrupted checkpoint files due to failures during the write. file_path = os.path.join(self.__config.agent_data_path, 'checkpoints.json') tmp_path = os.path.join(self.__config.agent_data_path, 'checkpoints.json~') fp = None try: fp = open(tmp_path, 'w') fp.write(json_lib.serialize(state)) fp.close() fp = None if sys.platform == 'win32' and os.path.isfile(file_path): os.unlink(file_path) os.rename(tmp_path, file_path) except (IOError, OSError): if fp is not None: fp.close() log.exception('Could not write checkpoint file due to error %s' % scalyr_util.get_pid_tid(), error_code='failedCheckpointWrite')
def emit_init_log(self, logger, include_debug): """Writes any logged information the controller has collected about the initialization of the agent_service to the provided logger. This is required because the initialization sequence occurs before the agent log is set up to write to a file instead of standard out. Using this, we can collect the information and then output it once the logger is set up. @param logger: The logger to use to write the information. @param include_debug: If True, include debug level logging as well. @type logger: Logger @type include_debug: bool """ logger.info('Emitting log lines saved during initialization: %s' % scalyr_util.get_pid_tid()) for line_entry in self.__init_log_lines: if not line_entry[1] or include_debug: logger.info(' %s' % line_entry[0]) if include_debug: logger.info('Parent pids:') current_child = os.getpid() current_parent = self.__get_ppid(os.getpid()) remaining_parents = 10 while current_parent is not None and remaining_parents > 0: if _can_read_command_line(current_parent): logger.info(' ppid=%d cmd=%s parent_of=%d' % (current_parent, _read_command_line(current_parent), current_child)) else: logger.info(' ppid=%d cmd=Unknown parent_of=%d' % (current_parent, current_child)) current_child = current_parent current_parent = self.__get_ppid(current_parent) remaining_parents -= 1 return
def start_agent_service(self, agent_run_method, quiet, fork=True): """Start the daemon process by forking a new process. This method will invoke the agent_run_method that was passed in when initializing this object. """ self._log_init_debug('Starting agent %s' % scalyr_util.get_pid_tid()) # noinspection PyUnusedLocal def handle_terminate(signal_num, frame): if self.__termination_handler is not None: self.__termination_handler() # noinspection PyUnusedLocal def handle_interrupt(signal_num, frame): if self.__status_handler is not None: self.__status_handler() # Start the daemon by forking off a new process. When it returns, we are either the original process # or the new forked one. If it are the original process, then we just return. if fork: if not self.__daemonize(): return else: # we are not a fork, so write the pid to a file if not self.__write_pidfile(): raise AgentAlreadyRunning( 'The pidfile %s exists and indicates it is running pid=%s' % (self.__pidfile, str(self.__read_pidfile()))) # Register for the TERM and INT signals. If we get a TERM, we terminate the process. If we # get a INT, then we write a status file.. this is what a process will send us when the command # scalyr-agent-2 status -v is invoked. original_term = signal.signal(signal.SIGTERM, handle_terminate) original_interrupt = signal.signal(signal.SIGINT, handle_interrupt) try: self.__is_initializing = False result = agent_run_method(self) if result is not None: sys.exit(result) else: sys.exit(99) finally: signal.signal(signal.SIGTERM, original_term) signal.signal(signal.SIGINT, original_interrupt)
def start_agent_service(self, agent_run_method, quiet, fork=True): """Start the daemon process by forking a new process. This method will invoke the agent_run_method that was passed in when initializing this object. """ self._log_init_debug('Starting agent %s' % scalyr_util.get_pid_tid()) # noinspection PyUnusedLocal def handle_terminate(signal_num, frame): if self.__termination_handler is not None: self.__termination_handler() # noinspection PyUnusedLocal def handle_interrupt(signal_num, frame): if self.__status_handler is not None: self.__status_handler() # Start the daemon by forking off a new process. When it returns, we are either the original process # or the new forked one. If it are the original process, then we just return. if fork: if not self.__daemonize(): return else: # we are not a fork, so write the pid to a file if not self.__write_pidfile(): raise AgentAlreadyRunning('The pidfile %s exists and indicates it is running pid=%s' % ( self.__pidfile, str(self.__read_pidfile()))) # Register for the TERM and INT signals. If we get a TERM, we terminate the process. If we # get a INT, then we write a status file.. this is what a process will send us when the command # scalyr-agent-2 status -v is invoked. original_term = signal.signal(signal.SIGTERM, handle_terminate) original_interrupt = signal.signal(signal.SIGINT, handle_interrupt) try: self.__is_initializing = False result = agent_run_method(self) if result is not None: sys.exit(result) else: sys.exit(99) finally: signal.signal(signal.SIGTERM, original_term) signal.signal(signal.SIGINT, original_interrupt)
def emit_init_log(self, logger, include_debug): """Writes any logged information the controller has collected about the initialization of the agent_service to the provided logger. This is required because the initialization sequence occurs before the agent log is set up to write to a file instead of standard out. Using this, we can collect the information and then output it once the logger is set up. @param logger: The logger to use to write the information. @param include_debug: If True, include debug level logging as well. @type logger: Logger @type include_debug: bool """ logger.info('Emitting log lines saved during initialization: %s' % scalyr_util.get_pid_tid()) for line_entry in self.__init_log_lines: if not line_entry[1] or include_debug: logger.info(' %s' % line_entry[0]) if include_debug: logger.info('Parent pids:') current_child = os.getpid() current_parent = self.__get_ppid(os.getpid()) remaining_parents = 10 while current_parent is not None and remaining_parents > 0: if _can_read_command_line(current_parent): logger.info( ' ppid=%d cmd=%s parent_of=%d' % (current_parent, _read_command_line(current_parent), current_child)) else: logger.info(' ppid=%d cmd=Unknown parent_of=%d' % (current_parent, current_child)) current_child = current_parent current_parent = self.__get_ppid(current_parent) remaining_parents -= 1 return
def is_agent_running(self, fail_if_running=False, fail_if_not_running=False): """Returns true if the agent service is running, as determined by the pidfile. This will optionally raise an Exception with an appropriate error message if the agent is not running. @param fail_if_running: True if the method should raise an Exception with a message about where the pidfile was read from. @param fail_if_not_running: True if the method should raise an Exception with a message about where the pidfile was read from. @type fail_if_running: bool @type fail_if_not_running: bool @return: True if the agent process is already running. @rtype: bool @raise AgentAlreadyRunning @raise AgentNotRunning """ self._log_init_debug('Checking if agent is running %s' % scalyr_util.get_pid_tid()) pid = self.__read_pidfile() if fail_if_running and pid is not None: raise AgentAlreadyRunning( 'The pidfile %s exists and indicates it is running pid=%d' % (self.__pidfile, pid)) if fail_if_not_running and pid is None: raise AgentNotRunning( 'The pidfile %s does not exist or listed process is not running.' % self.__pidfile) return pid is not None
def logger(message): self._log_init('%s %s' % (message, scalyr_util.get_pid_tid()))
def logger(message): self._log_init('%s (orig_pid=%s) %s' % (message, original_pid, scalyr_util.get_pid_tid()))