def run(self): """ This is the main loop which watches for agent and extension updates. """ logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler get_monitor_handler().run() from azurelinuxagent.ga.env import get_env_handler get_env_handler().run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() try: self._ensure_no_orphans() self._emit_restart_event() # TODO: Add means to stop running while self.running: if self._is_orphaned: logger.info("Goal state agent {0} was orphaned -- exiting", CURRENT_AGENT) break if self._upgrade_available(): if len(self.agents) > 0: logger.info( u"Agent {0} discovered {1} as an update and will exit", CURRENT_AGENT, self.agents[0].name) break exthandlers_handler.run() time.sleep(GOAL_STATE_INTERVAL) except Exception as e: logger.warn(u"Agent {0} failed with exception: {1}", CURRENT_AGENT, ustr(e)) logger.warn(traceback.format_exc()) sys.exit(1) return self._shutdown() sys.exit(0) return
def run(self): """ This is the main loop which watches for agent and extension updates. """ logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler get_monitor_handler().run() from azurelinuxagent.ga.env import get_env_handler get_env_handler().run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() try: self._ensure_no_orphans() self._emit_restart_event() self._ensure_partition_assigned() while self.running: if self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break if self._upgrade_available(): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() if last_etag != exthandlers_handler.last_etag: add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ProcessGoalState, is_success=True, duration=elapsed_milliseconds(utc_start), message="Incarnation {0}".format( exthandlers_handler.last_etag), log_event=True) time.sleep(GOAL_STATE_INTERVAL) except Exception as e: logger.warn(u"Agent {0} failed with exception: {1}", CURRENT_AGENT, ustr(e)) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)
def run(self): """ This is the main loop which watches for agent and extension updates. """ try: logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler monitor_thread = get_monitor_handler() monitor_thread.run() from azurelinuxagent.ga.env import get_env_handler env_thread = get_env_handler() env_thread.run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler() self._ensure_no_orphans() self._emit_restart_event() self._ensure_partition_assigned() self._ensure_readonly_files() while self.running: if self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break if not monitor_thread.is_alive(): logger.warn(u"Monitor thread died, restarting") monitor_thread.start() if not env_thread.is_alive(): logger.warn(u"Environment thread died, restarting") env_thread.start() if self._upgrade_available(): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() remote_access_handler.run() if last_etag != exthandlers_handler.last_etag: self._ensure_readonly_files() add_event( AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ProcessGoalState, is_success=True, duration=elapsed_milliseconds(utc_start), message="Incarnation {0}".format( exthandlers_handler.last_etag), log_event=True) time.sleep(GOAL_STATE_INTERVAL) except Exception as e: msg = u"Agent {0} failed with exception: {1}".format( CURRENT_AGENT, ustr(e)) self._set_sentinel(msg=msg) logger.warn(msg) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)
def run(self, debug=False): """ This is the main loop which watches for agent and extension updates. """ try: # NOTE: Do not add any telemetry events until after the monitoring handler has been started with the # call to 'monitor_thread.run()'. That method call initializes the protocol, which is needed in order to # load the goal state and update the container id in memory. Any telemetry events sent before this happens # will result in an uninitialized container id value. logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Log OS-specific info locally. os_info_msg = u"Distro info: {0} {1}, osutil class being used: {2}, " \ u"agent service name: {3}".format(DISTRO_NAME, DISTRO_VERSION, type(self.osutil).__name__, self.osutil.service_name) logger.info(os_info_msg) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler monitor_thread = get_monitor_handler() monitor_thread.run() # NOTE: Any telemetry events added from this point on will be properly populated with the container id. from azurelinuxagent.ga.env import get_env_handler env_thread = get_env_handler() env_thread.run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler() self._ensure_no_orphans() self._emit_restart_event() self._ensure_partition_assigned() self._ensure_readonly_files() self._ensure_cgroups_initialized() # Send OS-specific info as a telemetry event after the monitoring thread has been initialized, and with # it the container id too. add_event(AGENT_NAME, op=WALAEventOperation.OSInfo, message=os_info_msg) goal_state_interval = GOAL_STATE_INTERVAL \ if conf.get_extensions_enabled() \ else GOAL_STATE_INTERVAL_DISABLED while self.running: if not debug and self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break if not monitor_thread.is_alive(): logger.warn(u"Monitor thread died, restarting") monitor_thread.start() if not env_thread.is_alive(): logger.warn(u"Environment thread died, restarting") env_thread.start() if self._upgrade_available(): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() remote_access_handler.run() if last_etag != exthandlers_handler.last_etag: self._ensure_readonly_files() duration = elapsed_milliseconds(utc_start) logger.info( 'ProcessGoalState completed [incarnation {0}; {1} ms]', exthandlers_handler.last_etag, duration) add_event(AGENT_NAME, op=WALAEventOperation.ProcessGoalState, duration=duration, message="Incarnation {0}".format( exthandlers_handler.last_etag)) time.sleep(goal_state_interval) except Exception as e: msg = u"Agent {0} failed with exception: {1}".format( CURRENT_AGENT, ustr(e)) self._set_sentinel(msg=msg) logger.warn(msg) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)
def run(self): """ This is the main loop which watches for agent and extension updates. """ logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler get_monitor_handler().run() from azurelinuxagent.ga.env import get_env_handler get_env_handler().run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() try: send_event_time = datetime.utcnow() self._ensure_no_orphans() self._emit_restart_event() while self.running: if self._is_orphaned: logger.info("Goal state agent {0} was orphaned -- exiting", CURRENT_AGENT) break if self._upgrade_available(): if len(self.agents) > 0: logger.info( u"Agent {0} discovered {1} as an update and will exit", CURRENT_AGENT, self.agents[0].name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() log_event = last_etag != exthandlers_handler.last_etag or \ (datetime.utcnow() >= send_event_time) add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ProcessGoalState, is_success=True, duration=elapsed_milliseconds(utc_start), log_event=log_event) if log_event: send_event_time += timedelta( minutes=REPORT_STATUS_INTERVAL) test_agent = self.get_test_agent() if test_agent is not None and test_agent.in_slice: test_agent.enable() logger.info(u"Enabled Agent {0} as test agent", test_agent.name) break time.sleep(GOAL_STATE_INTERVAL) except Exception as e: logger.warn(u"Agent {0} failed with exception: {1}", CURRENT_AGENT, ustr(e)) logger.warn(traceback.format_exc()) sys.exit(1) return self._shutdown() sys.exit(0) return
def run(self): """ This is the main loop which watches for agent and extension updates. """ try: logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler monitor_thread = get_monitor_handler() monitor_thread.run() from azurelinuxagent.ga.env import get_env_handler env_thread = get_env_handler() env_thread.run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler() self._ensure_no_orphans() self._emit_restart_event() self._ensure_partition_assigned() self._ensure_readonly_files() while self.running: if self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break if not monitor_thread.is_alive(): logger.warn(u"Monitor thread died, restarting") monitor_thread.start() if not env_thread.is_alive(): logger.warn(u"Environment thread died, restarting") env_thread.start() if self._upgrade_available(): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() remote_access_handler.run() if last_etag != exthandlers_handler.last_etag: self._ensure_readonly_files() duration = elapsed_milliseconds(utc_start) logger.info('ProcessGoalState completed [incarnation {0}; {1} ms]', exthandlers_handler.last_etag, duration) add_event( AGENT_NAME, op=WALAEventOperation.ProcessGoalState, duration=duration, message="Incarnation {0}".format(exthandlers_handler.last_etag)) time.sleep(GOAL_STATE_INTERVAL) except Exception as e: msg = u"Agent {0} failed with exception: {1}".format(CURRENT_AGENT, ustr(e)) self._set_sentinel(msg=msg) logger.warn(msg) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)
def run(self, debug=False): """ This is the main loop which watches for agent and extension updates. """ try: logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # # Fetch the goal state one time; some components depend on information provided by the goal state and this # call ensures the required info is initialized (e.g telemetry depends on the container ID.) # protocol = self.protocol_util.get_protocol() protocol.update_goal_state() initialize_event_logger_vminfo_common_parameters(protocol) # Log OS-specific info. os_info_msg = u"Distro: {0}-{1}; OSUtil: {2}; AgentService: {3}; Python: {4}.{5}.{6}".format( DISTRO_NAME, DISTRO_VERSION, type(self.osutil).__name__, self.osutil.service_name, PY_VERSION_MAJOR, PY_VERSION_MINOR, PY_VERSION_MICRO) logger.info(os_info_msg) add_event(AGENT_NAME, op=WALAEventOperation.OSInfo, message=os_info_msg) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler monitor_thread = get_monitor_handler() monitor_thread.run() from azurelinuxagent.ga.env import get_env_handler env_thread = get_env_handler() env_thread.run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler(protocol) migrate_handler_state() from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler(protocol) self._ensure_no_orphans() self._emit_restart_event() self._emit_changes_in_default_configuration() self._ensure_partition_assigned() self._ensure_readonly_files() self._ensure_cgroups_initialized() goal_state_interval = conf.get_goal_state_period( ) if conf.get_extensions_enabled( ) else GOAL_STATE_INTERVAL_DISABLED while self.running: # # Check that the parent process (the agent's daemon) is still running # if not debug and self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break # # Check that all the threads are still running # if not monitor_thread.is_alive(): logger.warn(u"Monitor thread died, restarting") monitor_thread.start() if not env_thread.is_alive(): logger.warn(u"Environment thread died, restarting") env_thread.start() # # Process the goal state # if not protocol.try_update_goal_state(): self._heartbeat_update_goal_state_error_count += 1 else: if self._upgrade_available(protocol): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() remote_access_handler.run() if last_etag != exthandlers_handler.last_etag: self._ensure_readonly_files() duration = elapsed_milliseconds(utc_start) logger.info( 'ProcessGoalState completed [incarnation {0}; {1} ms]', exthandlers_handler.last_etag, duration) add_event(AGENT_NAME, op=WALAEventOperation.ProcessGoalState, duration=duration, message="Incarnation {0}".format( exthandlers_handler.last_etag)) self._send_heartbeat_telemetry(protocol) time.sleep(goal_state_interval) except Exception as e: msg = u"Agent {0} failed with exception: {1}".format( CURRENT_AGENT, ustr(e)) self._set_sentinel(msg=msg) logger.warn(msg) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)
def run(self, debug=False): """ This is the main loop which watches for agent and extension updates. """ try: logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # # Fetch the goal state one time; some components depend on information provided by the goal state and this # call ensures the required info is initialized (e.g telemetry depends on the container ID.) # protocol = self.protocol_util.get_protocol() protocol.update_goal_state() # Initialize the common parameters for telemetry events initialize_event_logger_vminfo_common_parameters(protocol) # Log OS-specific info. os_info_msg = u"Distro: {dist_name}-{dist_ver}; "\ u"OSUtil: {util_name}; AgentService: {service_name}; "\ u"Python: {py_major}.{py_minor}.{py_micro}; "\ u"systemd: {systemd}; "\ u"LISDrivers: {lis_ver}; "\ u"logrotate: {has_logrotate};".format( dist_name=DISTRO_NAME, dist_ver=DISTRO_VERSION, util_name=type(self.osutil).__name__, service_name=self.osutil.service_name, py_major=PY_VERSION_MAJOR, py_minor=PY_VERSION_MINOR, py_micro=PY_VERSION_MICRO, systemd=systemd.is_systemd(), lis_ver=get_lis_version(), has_logrotate=has_logrotate() ) logger.info(os_info_msg) add_event(AGENT_NAME, op=WALAEventOperation.OSInfo, message=os_info_msg) # # Perform initialization tasks # from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler(protocol) migrate_handler_state() from azurelinuxagent.ga.remoteaccess import get_remote_access_handler remote_access_handler = get_remote_access_handler(protocol) self._ensure_no_orphans() self._emit_restart_event() self._emit_changes_in_default_configuration() self._ensure_partition_assigned() self._ensure_readonly_files() self._ensure_cgroups_initialized() self._ensure_extension_telemetry_state_configured_properly( protocol) self._ensure_firewall_rules_persisted( dst_ip=protocol.get_endpoint()) # Get all thread handlers telemetry_handler = get_send_telemetry_events_handler( self.protocol_util) all_thread_handlers = [ get_monitor_handler(), get_env_handler(), telemetry_handler, get_collect_telemetry_events_handler(telemetry_handler) ] if is_log_collection_allowed(): all_thread_handlers.append(get_collect_logs_handler()) # Launch all monitoring threads for thread_handler in all_thread_handlers: thread_handler.run() goal_state_interval = conf.get_goal_state_period( ) if conf.get_extensions_enabled( ) else GOAL_STATE_INTERVAL_DISABLED while self.running: # # Check that the parent process (the agent's daemon) is still running # if not debug and self._is_orphaned: logger.info("Agent {0} is an orphan -- exiting", CURRENT_AGENT) break # # Check that all the threads are still running # for thread_handler in all_thread_handlers: if not thread_handler.is_alive(): logger.warn("{0} thread died, restarting".format( thread_handler.get_thread_name())) thread_handler.start() # # Process the goal state # if not protocol.try_update_goal_state(): self._heartbeat_update_goal_state_error_count += 1 else: if self._upgrade_available(protocol): available_agent = self.get_latest_agent() if available_agent is None: logger.info( "Agent {0} is reverting to the installed agent -- exiting", CURRENT_AGENT) else: logger.info( u"Agent {0} discovered update {1} -- exiting", CURRENT_AGENT, available_agent.name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() remote_access_handler.run() if last_etag != exthandlers_handler.last_etag: self._ensure_readonly_files() duration = elapsed_milliseconds(utc_start) activity_id, correlation_id, gs_creation_time = exthandlers_handler.get_goal_state_debug_metadata( ) msg = 'ProcessGoalState completed [Incarnation: {0}; {1} ms; Activity Id: {2}; Correlation Id: {3}; GS Creation Time: {4}]'.format( exthandlers_handler.last_etag, duration, activity_id, correlation_id, gs_creation_time) logger.info(msg) add_event(AGENT_NAME, op=WALAEventOperation.ProcessGoalState, duration=duration, message=msg) self._send_heartbeat_telemetry(protocol) time.sleep(goal_state_interval) except Exception as error: msg = u"Agent {0} failed with exception: {1}".format( CURRENT_AGENT, ustr(error)) self._set_sentinel(msg=msg) logger.warn(msg) logger.warn(traceback.format_exc()) sys.exit(1) # additional return here because sys.exit is mocked in unit tests return self._shutdown() sys.exit(0)
def run(self): """ This is the main loop which watches for agent and extension updates. """ logger.info(u"Agent {0} is running as the goal state agent", CURRENT_AGENT) # Launch monitoring threads from azurelinuxagent.ga.monitor import get_monitor_handler get_monitor_handler().run() from azurelinuxagent.ga.env import get_env_handler get_env_handler().run() from azurelinuxagent.ga.exthandlers import get_exthandlers_handler, migrate_handler_state exthandlers_handler = get_exthandlers_handler() migrate_handler_state() try: send_event_time = datetime.utcnow() self._ensure_no_orphans() self._emit_restart_event() while self.running: if self._is_orphaned: logger.info("Goal state agent {0} was orphaned -- exiting", CURRENT_AGENT) break if self._upgrade_available(): if len(self.agents) > 0: logger.info( u"Agent {0} discovered {1} as an update and will exit", CURRENT_AGENT, self.agents[0].name) break utc_start = datetime.utcnow() last_etag = exthandlers_handler.last_etag exthandlers_handler.run() log_event = last_etag != exthandlers_handler.last_etag or \ (datetime.utcnow() >= send_event_time) add_event( AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.ProcessGoalState, is_success=True, duration=elapsed_milliseconds(utc_start), log_event=log_event) if log_event: send_event_time += timedelta(minutes=REPORT_STATUS_INTERVAL) test_agent = self.get_test_agent() if test_agent is not None and test_agent.in_slice: test_agent.enable() logger.info(u"Enabled Agent {0} as test agent", test_agent.name) break time.sleep(GOAL_STATE_INTERVAL) except Exception as e: logger.warn(u"Agent {0} failed with exception: {1}", CURRENT_AGENT, ustr(e)) logger.warn(traceback.format_exc()) sys.exit(1) return self._shutdown() sys.exit(0) return