def initialize(self): try: if self._initialized: return # check whether cgroup monitoring is supported on the current distro self._cgroups_supported = CGroupsApi.cgroups_supported() if not self._cgroups_supported: logger.info("Cgroup monitoring is not supported on {0}", get_distro()) return # check that systemd is detected correctly self._cgroups_api = SystemdCgroupsApi() if not systemd.is_systemd(): _log_cgroup_warning("systemd was not detected on {0}", get_distro()) return _log_cgroup_info("systemd version: {0}", systemd.get_version()) # This is temporarily disabled while we analyze telemetry. Likely it will be removed. # self.__collect_azure_unit_telemetry() # self.__collect_agent_unit_files_telemetry() if not self.__check_no_legacy_cgroups(): return agent_unit_name = systemd.get_agent_unit_name() agent_slice = systemd.get_unit_property(agent_unit_name, "Slice") if agent_slice not in (_AZURE_SLICE, "system.slice"): _log_cgroup_warning("The agent is within an unexpected slice: {0}", agent_slice) return self.__setup_azure_slice() cpu_controller_root, memory_controller_root = self.__get_cgroup_controllers() self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroups(agent_slice, cpu_controller_root, memory_controller_root) if self._agent_cpu_cgroup_path is not None: _log_cgroup_info("Agent CPU cgroup: {0}", self._agent_cpu_cgroup_path) self.enable() CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path)) _log_cgroup_info('Cgroups enabled: {0}', self._cgroups_enabled) except Exception as exception: _log_cgroup_warning("Error initializing cgroups: {0}", ustr(exception)) finally: self._initialized = True
def test_cgroups_should_be_supported_only_on_ubuntu_16_and_later(self): test_cases = [ (['ubuntu', '16.04', 'xenial'], True), (['ubuntu', '16.10', 'yakkety'], True), (['ubuntu', '18.04', 'bionic'], True), (['ubuntu', '18.10', 'cosmic'], True), (['ubuntu', '20.04', 'focal'], True), (['ubuntu', '20.10', 'groovy'], True), (['centos', '7.5', 'Source'], False), (['redhat', '7.7', 'Maipo'], False), (['redhat', '7.7.1908', 'Core'], False), (['bigip', '15.0.1', 'Final'], False), (['gaia', '273.562', 'R80.30'], False), (['debian', '9.1', ''], False), ] for (distro, supported) in test_cases: with patch("azurelinuxagent.common.cgroupapi.get_distro", return_value=distro): self.assertEqual(CGroupsApi.cgroups_supported(), supported, "cgroups_supported() failed on {0}".format(distro))
def initialize(self): try: if self._initialized: return # # check whether cgroup monitoring is supported on the current distro # self._cgroups_supported = CGroupsApi.cgroups_supported() if not self._cgroups_supported: logger.info("Cgroup monitoring is not supported on {0}", get_distro()) return # # check systemd # self._cgroups_api = CGroupsApi.create() if not isinstance(self._cgroups_api, SystemdCgroupsApi): message = "systemd was not detected on {0}".format( get_distro()) logger.warn(message) add_event(op=WALAEventOperation.CGroupsInitialize, is_success=False, message=message, log_event=False) return def log_cgroup_info(format_string, *args): message = format_string.format(*args) logger.info(message) add_event(op=WALAEventOperation.CGroupsInfo, message=message) def log_cgroup_warn(format_string, *args): message = format_string.format(*args) logger.warn(message) add_event(op=WALAEventOperation.CGroupsInfo, message=message, is_success=False, log_event=False) log_cgroup_info("systemd version: {0}", self._cgroups_api.get_systemd_version()) # # Older versions of the daemon (2.2.31-2.2.40) wrote their PID to /sys/fs/cgroup/{cpu,memory}/WALinuxAgent/WALinuxAgent. When running # under systemd this could produce invalid resource usage data. Do not enable cgroups under this condition. # legacy_cgroups = self._cgroups_api.cleanup_legacy_cgroups() if legacy_cgroups > 0: log_cgroup_warn( "The daemon's PID was added to a legacy cgroup; will not monitor resource usage." ) return # # check v1 controllers # cpu_controller_root, memory_controller_root = self._cgroups_api.get_cgroup_mount_points( ) if cpu_controller_root is not None: logger.info("The CPU cgroup controller is mounted at {0}", cpu_controller_root) else: log_cgroup_warn("The CPU cgroup controller is not mounted") if memory_controller_root is not None: logger.info( "The memory cgroup controller is mounted at {0}", memory_controller_root) else: log_cgroup_warn( "The memory cgroup controller is not mounted") # # check v2 controllers # cgroup2_mountpoint, cgroup2_controllers = self._cgroups_api.get_cgroup2_controllers( ) if cgroup2_mountpoint is not None: log_cgroup_warn( "cgroups v2 mounted at {0}. Controllers: [{1}]", cgroup2_mountpoint, cgroup2_controllers) # # check the cgroups for the agent # agent_unit_name = self._cgroups_api.get_agent_unit_name() cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths( "self") if cpu_cgroup_relative_path is None: log_cgroup_warn( "The agent's process is not within a CPU cgroup") else: cpu_accounting = self._cgroups_api.get_unit_property( agent_unit_name, "CPUAccounting") log_cgroup_info('CPUAccounting: {0}', cpu_accounting) if memory_cgroup_relative_path is None: log_cgroup_warn( "The agent's process is not within a memory cgroup") else: memory_accounting = self._cgroups_api.get_unit_property( agent_unit_name, "MemoryAccounting") log_cgroup_info('MemoryAccounting: {0}', memory_accounting) # # All good, enable cgroups and start monitoring the agent # self._cgroups_enabled = True if cpu_controller_root is None or cpu_cgroup_relative_path is None: logger.info("Will not track CPU for the agent's cgroup") else: self._agent_cpu_cgroup_path = os.path.join( cpu_controller_root, cpu_cgroup_relative_path) CGroupsTelemetry.track_cgroup( CpuCgroup(agent_unit_name, self._agent_cpu_cgroup_path)) if memory_controller_root is None or memory_cgroup_relative_path is None: logger.info("Will not track memory for the agent's cgroup") else: self._agent_memory_cgroup_path = os.path.join( memory_controller_root, memory_cgroup_relative_path) CGroupsTelemetry.track_cgroup( MemoryCgroup(agent_unit_name, self._agent_memory_cgroup_path)) log_cgroup_info("Agent cgroups: CPU: {0} -- MEMORY: {1}", self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path) except Exception as e: message = "Error initializing cgroups: {0}".format(ustr(e)) logger.warn(message) add_event(op=WALAEventOperation.CGroupsInitialize, is_success=False, message=message, log_event=False) finally: self._initialized = True