def __collect_azure_unit_telemetry(): azure_units = [] try: units = shellutil.run_command(['systemctl', 'list-units', 'azure*', '-all']) for line in units.split('\n'): match = re.match(r'\s?(azure[^\s]*)\s?', line, re.IGNORECASE) if match is not None: azure_units.append((match.group(1), line)) except shellutil.CommandError as command_error: _log_cgroup_warning("Failed to list systemd units: {0}", ustr(command_error)) for unit_name, unit_description in azure_units: unit_slice = "Unknown" try: unit_slice = systemd.get_unit_property(unit_name, "Slice") except Exception as exception: _log_cgroup_warning("Failed to query Slice for {0}: {1}", unit_name, ustr(exception)) _log_cgroup_info("Found an Azure unit under slice {0}: {1}", unit_slice, unit_description) if len(azure_units) == 0: try: cgroups = shellutil.run_command('systemd-cgls') for line in cgroups.split('\n'): if re.match(r'[^\x00-\xff]+azure\.slice\s*', line, re.UNICODE): logger.info(ustr("Found a cgroup for azure.slice\n{0}").format(cgroups)) # Don't add the output of systemd-cgls to the telemetry, since currently it does not support Unicode add_event(op=WALAEventOperation.CGroupsInfo, message="Found a cgroup for azure.slice") except shellutil.CommandError as command_error: _log_cgroup_warning("Failed to list systemd units: {0}", ustr(command_error))
def __get_agent_cgroups(self, agent_slice, cpu_controller_root, memory_controller_root): agent_unit_name = systemd.get_agent_unit_name() expected_relative_path = os.path.join(agent_slice, agent_unit_name) cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths("self") if cpu_cgroup_relative_path is None: _log_cgroup_warning("The agent's process is not within a CPU cgroup") else: if cpu_cgroup_relative_path == expected_relative_path: _log_cgroup_info('CPUAccounting: {0}', systemd.get_unit_property(agent_unit_name, "CPUAccounting")) _log_cgroup_info('CPUQuota: {0}', systemd.get_unit_property(agent_unit_name, "CPUQuotaPerSecUSec")) else: cpu_cgroup_relative_path = None # Set the path to None to prevent monitoring _log_cgroup_warning( "The Agent is not in the expected CPU cgroup; will not enable monitoring. Cgroup:[{0}] Expected:[{1}]", cpu_cgroup_relative_path, expected_relative_path) if memory_cgroup_relative_path is None: _log_cgroup_warning("The agent's process is not within a memory cgroup") else: if memory_cgroup_relative_path == expected_relative_path: memory_accounting = systemd.get_unit_property(agent_unit_name, "MemoryAccounting") _log_cgroup_info('MemoryAccounting: {0}', memory_accounting) else: memory_cgroup_relative_path = None # Set the path to None to prevent monitoring _log_cgroup_info( "The Agent is not in the expected memory cgroup; will not enable monitoring. CGroup:[{0}] Expected:[{1}]", memory_cgroup_relative_path, expected_relative_path) if cpu_controller_root is not None and cpu_cgroup_relative_path is not None: agent_cpu_cgroup_path = os.path.join(cpu_controller_root, cpu_cgroup_relative_path) else: agent_cpu_cgroup_path = None if memory_controller_root is not None and memory_cgroup_relative_path is not None: agent_memory_cgroup_path = os.path.join(memory_controller_root, memory_cgroup_relative_path) else: agent_memory_cgroup_path = None return agent_cpu_cgroup_path, agent_memory_cgroup_path
def test_get_unit_property_should_return_the_value_of_the_given_property( self): with mock_cgroup_environment(self.tmp_dir): cpu_accounting = systemd.get_unit_property("walinuxagent.service", "CPUAccounting") self.assertEqual( cpu_accounting, "no", "Property {0} of {1} is incorrect".format( "CPUAccounting", "walinuxagent.service"))
def __collect_agent_unit_files_telemetry(): agent_unit_files = [] agent_service_name = get_osutil().get_service_name() try: fragment_path = systemd.get_unit_property(agent_service_name, "FragmentPath") if fragment_path != "/lib/systemd/system/{0}.service".format(agent_service_name): agent_unit_files.append(fragment_path) except Exception as exception: _log_cgroup_warning("Failed to query the agent's FragmentPath: {0}", ustr(exception)) try: drop_in_paths = systemd.get_unit_property(agent_service_name, "DropInPaths") for path in drop_in_paths.split(): agent_unit_files.append(path) except Exception as exception: _log_cgroup_warning("Failed to query the agent's DropInPaths: {0}", ustr(exception)) for unit_file in agent_unit_files: try: with open(unit_file, "r") as file_object: _log_cgroup_info("Found a custom unit file for the agent: {0}\n{1}", unit_file, file_object.read()) except Exception as exception: _log_cgroup_warning("Can't read {0}: {1}", unit_file, ustr(exception))
def initialize(self): try: if self._initialized: return # check whether cgroup monitoring is supported on the current distro self._cgroups_supported = CGroupsApi.cgroups_supported() if not self._cgroups_supported: logger.info("Cgroup monitoring is not supported on {0}", get_distro()) return # check that systemd is detected correctly self._cgroups_api = SystemdCgroupsApi() if not systemd.is_systemd(): _log_cgroup_warning("systemd was not detected on {0}", get_distro()) return _log_cgroup_info("systemd version: {0}", systemd.get_version()) # This is temporarily disabled while we analyze telemetry. Likely it will be removed. # self.__collect_azure_unit_telemetry() # self.__collect_agent_unit_files_telemetry() if not self.__check_no_legacy_cgroups(): return agent_unit_name = systemd.get_agent_unit_name() agent_slice = systemd.get_unit_property(agent_unit_name, "Slice") if agent_slice not in (_AZURE_SLICE, "system.slice"): _log_cgroup_warning("The agent is within an unexpected slice: {0}", agent_slice) return self.__setup_azure_slice() cpu_controller_root, memory_controller_root = self.__get_cgroup_controllers() self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroups(agent_slice, cpu_controller_root, memory_controller_root) if self._agent_cpu_cgroup_path is not None: _log_cgroup_info("Agent CPU cgroup: {0}", self._agent_cpu_cgroup_path) self.enable() CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path)) _log_cgroup_info('Cgroups enabled: {0}', self._cgroups_enabled) except Exception as exception: _log_cgroup_warning("Error initializing cgroups: {0}", ustr(exception)) finally: self._initialized = True
def get_unit_cgroup_paths(self, unit_name): """ Returns a tuple with the path of the cpu and memory cgroups for the given unit. The values returned can be None if the controller is not mounted. Ex: ControlGroup=/azure.slice/walinuxagent.service controlgroup_path[1:] = azure.slice/walinuxagent.service """ controlgroup_path = systemd.get_unit_property(unit_name, "ControlGroup") cpu_mount_point, memory_mount_point = self.get_cgroup_mount_points() cpu_cgroup_path = os.path.join(cpu_mount_point, controlgroup_path[1:]) \ if cpu_mount_point is not None else None memory_cgroup_path = os.path.join(memory_mount_point, controlgroup_path[1:]) \ if memory_mount_point is not None else None return cpu_cgroup_path, memory_cgroup_path