Пример #1
0
        def __collect_azure_unit_telemetry():
            azure_units = []

            try:
                units = shellutil.run_command(['systemctl', 'list-units', 'azure*', '-all'])
                for line in units.split('\n'):
                    match = re.match(r'\s?(azure[^\s]*)\s?', line, re.IGNORECASE)
                    if match is not None:
                        azure_units.append((match.group(1), line))
            except shellutil.CommandError as command_error:
                _log_cgroup_warning("Failed to list systemd units: {0}", ustr(command_error))

            for unit_name, unit_description in azure_units:
                unit_slice = "Unknown"
                try:
                    unit_slice = systemd.get_unit_property(unit_name, "Slice")
                except Exception as exception:
                    _log_cgroup_warning("Failed to query Slice for {0}: {1}", unit_name, ustr(exception))

                _log_cgroup_info("Found an Azure unit under slice {0}: {1}", unit_slice, unit_description)

            if len(azure_units) == 0:
                try:
                    cgroups = shellutil.run_command('systemd-cgls')
                    for line in cgroups.split('\n'):
                        if re.match(r'[^\x00-\xff]+azure\.slice\s*', line, re.UNICODE):
                            logger.info(ustr("Found a cgroup for azure.slice\n{0}").format(cgroups))
                            # Don't add the output of systemd-cgls to the telemetry, since currently it does not support Unicode
                            add_event(op=WALAEventOperation.CGroupsInfo, message="Found a cgroup for azure.slice")
                except shellutil.CommandError as command_error:
                    _log_cgroup_warning("Failed to list systemd units: {0}", ustr(command_error))
Пример #2
0
        def __get_agent_cgroups(self, agent_slice, cpu_controller_root, memory_controller_root):
            agent_unit_name = systemd.get_agent_unit_name()

            expected_relative_path = os.path.join(agent_slice, agent_unit_name)
            cpu_cgroup_relative_path, memory_cgroup_relative_path = self._cgroups_api.get_process_cgroup_relative_paths("self")

            if cpu_cgroup_relative_path is None:
                _log_cgroup_warning("The agent's process is not within a CPU cgroup")
            else:
                if cpu_cgroup_relative_path == expected_relative_path:
                    _log_cgroup_info('CPUAccounting: {0}', systemd.get_unit_property(agent_unit_name, "CPUAccounting"))
                    _log_cgroup_info('CPUQuota: {0}', systemd.get_unit_property(agent_unit_name, "CPUQuotaPerSecUSec"))
                else:
                    cpu_cgroup_relative_path = None  # Set the path to None to prevent monitoring
                    _log_cgroup_warning(
                        "The Agent is not in the expected CPU cgroup; will not enable monitoring. Cgroup:[{0}] Expected:[{1}]",
                        cpu_cgroup_relative_path,
                        expected_relative_path)

            if memory_cgroup_relative_path is None:
                _log_cgroup_warning("The agent's process is not within a memory cgroup")
            else:
                if memory_cgroup_relative_path == expected_relative_path:
                    memory_accounting = systemd.get_unit_property(agent_unit_name, "MemoryAccounting")
                    _log_cgroup_info('MemoryAccounting: {0}', memory_accounting)
                else:
                    memory_cgroup_relative_path = None  # Set the path to None to prevent monitoring
                    _log_cgroup_info(
                        "The Agent is not in the expected memory cgroup; will not enable monitoring. CGroup:[{0}] Expected:[{1}]",
                        memory_cgroup_relative_path,
                        expected_relative_path)

            if cpu_controller_root is not None and cpu_cgroup_relative_path is not None:
                agent_cpu_cgroup_path = os.path.join(cpu_controller_root, cpu_cgroup_relative_path)
            else:
                agent_cpu_cgroup_path = None

            if memory_controller_root is not None and memory_cgroup_relative_path is not None:
                agent_memory_cgroup_path = os.path.join(memory_controller_root, memory_cgroup_relative_path)
            else:
                agent_memory_cgroup_path = None

            return agent_cpu_cgroup_path, agent_memory_cgroup_path
Пример #3
0
    def test_get_unit_property_should_return_the_value_of_the_given_property(
            self):
        with mock_cgroup_environment(self.tmp_dir):
            cpu_accounting = systemd.get_unit_property("walinuxagent.service",
                                                       "CPUAccounting")

            self.assertEqual(
                cpu_accounting, "no",
                "Property {0} of {1} is incorrect".format(
                    "CPUAccounting", "walinuxagent.service"))
Пример #4
0
        def __collect_agent_unit_files_telemetry():
            agent_unit_files = []
            agent_service_name = get_osutil().get_service_name()
            try:
                fragment_path = systemd.get_unit_property(agent_service_name, "FragmentPath")
                if fragment_path != "/lib/systemd/system/{0}.service".format(agent_service_name):
                    agent_unit_files.append(fragment_path)
            except Exception as exception:
                _log_cgroup_warning("Failed to query the agent's FragmentPath: {0}", ustr(exception))

            try:
                drop_in_paths = systemd.get_unit_property(agent_service_name, "DropInPaths")
                for path in drop_in_paths.split():
                    agent_unit_files.append(path)
            except Exception as exception:
                _log_cgroup_warning("Failed to query the agent's DropInPaths: {0}", ustr(exception))

            for unit_file in agent_unit_files:
                try:
                    with open(unit_file, "r") as file_object:
                        _log_cgroup_info("Found a custom unit file for the agent: {0}\n{1}", unit_file, file_object.read())
                except Exception as exception:
                    _log_cgroup_warning("Can't read {0}: {1}", unit_file, ustr(exception))
Пример #5
0
        def initialize(self):
            try:
                if self._initialized:
                    return

                # check whether cgroup monitoring is supported on the current distro
                self._cgroups_supported = CGroupsApi.cgroups_supported()
                if not self._cgroups_supported:
                    logger.info("Cgroup monitoring is not supported on {0}", get_distro())
                    return

                # check that systemd is detected correctly
                self._cgroups_api = SystemdCgroupsApi()
                if not systemd.is_systemd():
                    _log_cgroup_warning("systemd was not detected on {0}", get_distro())
                    return

                _log_cgroup_info("systemd version: {0}", systemd.get_version())

                # This is temporarily disabled while we analyze telemetry. Likely it will be removed.
                # self.__collect_azure_unit_telemetry()
                # self.__collect_agent_unit_files_telemetry()

                if not self.__check_no_legacy_cgroups():
                    return

                agent_unit_name = systemd.get_agent_unit_name()
                agent_slice = systemd.get_unit_property(agent_unit_name, "Slice")
                if agent_slice not in (_AZURE_SLICE, "system.slice"):
                    _log_cgroup_warning("The agent is within an unexpected slice: {0}", agent_slice)
                    return

                self.__setup_azure_slice()

                cpu_controller_root, memory_controller_root = self.__get_cgroup_controllers()
                self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroups(agent_slice, cpu_controller_root, memory_controller_root)

                if self._agent_cpu_cgroup_path is not None:
                    _log_cgroup_info("Agent CPU cgroup: {0}", self._agent_cpu_cgroup_path)
                    self.enable()
                    CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))

                _log_cgroup_info('Cgroups enabled: {0}', self._cgroups_enabled)

            except Exception as exception:
                _log_cgroup_warning("Error initializing cgroups: {0}", ustr(exception))
            finally:
                self._initialized = True
Пример #6
0
    def get_unit_cgroup_paths(self, unit_name):
        """
        Returns a tuple with the path of the cpu and memory cgroups for the given unit.
        The values returned can be None if the controller is not mounted.
        Ex: ControlGroup=/azure.slice/walinuxagent.service
        controlgroup_path[1:] = azure.slice/walinuxagent.service
        """
        controlgroup_path = systemd.get_unit_property(unit_name,
                                                      "ControlGroup")
        cpu_mount_point, memory_mount_point = self.get_cgroup_mount_points()

        cpu_cgroup_path = os.path.join(cpu_mount_point, controlgroup_path[1:]) \
            if cpu_mount_point is not None else None

        memory_cgroup_path = os.path.join(memory_mount_point, controlgroup_path[1:]) \
            if memory_mount_point is not None else None

        return cpu_cgroup_path, memory_cgroup_path