Пример #1
0
    def test_start_extension_command_should_start_tracking_the_extension_cgroups(
            self):
        # CPU usage is initialized when we begin tracking a CPU cgroup; since this test does not retrieve the
        # CPU usage, there is no need for initialization
        with patch(
                "azurelinuxagent.common.cgroup.CpuCgroup.initialize_cpu_usage"
        ):
            CGroupConfigurator.get_instance().start_extension_command(
                extension_name="Microsoft.Compute.TestExtension-1.2.3",
                command="date",
                timeout=300,
                shell=False,
                cwd=self.tmp_dir,
                env={},
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE)

        self.assertTrue(
            CGroupsTelemetry.is_tracked(
                os.path.join(
                    self.cgroups_file_system_root, "cpu",
                    "walinuxagent.extensions/Microsoft.Compute.TestExtension_1.2.3"
                )))
        self.assertTrue(
            CGroupsTelemetry.is_tracked(
                os.path.join(
                    self.cgroups_file_system_root, "memory",
                    "walinuxagent.extensions/Microsoft.Compute.TestExtension_1.2.3"
                )))
Пример #2
0
    def test_cleanup_legacy_cgroups_should_disable_cgroups_when_the_daemon_was_added_to_the_legacy_cgroup_on_systemd(self, _):
        # Set up a mock /var/run/waagent.pid file
        daemon_pid = "42"
        daemon_pid_file = os.path.join(self.tmp_dir, "waagent.pid")
        fileutil.write_file(daemon_pid_file, daemon_pid + "\n")

        # Set up old controller cgroups and add the daemon PID to them
        CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "cpu", daemon_pid)
        CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root, "memory", daemon_pid)

        # Start tracking a couple of dummy cgroups
        CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "cpu"))
        CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "memory"))

        cgroup_configurator = CGroupConfigurator.get_instance()

        with patch("azurelinuxagent.common.cgroupconfigurator.add_event") as mock_add_event:
            with patch("azurelinuxagent.common.cgroupapi.get_agent_pid_file_path", return_value=daemon_pid_file):
                    cgroup_configurator.cleanup_legacy_cgroups()

        self.assertEquals(len(mock_add_event.call_args_list), 1)
        _, kwargs = mock_add_event.call_args_list[0]
        self.assertEquals(kwargs['op'], 'CGroupsCleanUp')
        self.assertFalse(kwargs['is_success'])
        self.assertEquals(
            kwargs['message'],
            "Failed to process legacy cgroups. Collection of resource usage data will be disabled. [CGroupsException] The daemon's PID ({0}) was already added to the legacy cgroup; this invalidates resource usage data.".format(daemon_pid))

        self.assertFalse(cgroup_configurator.enabled())
        self.assertEquals(len(CGroupsTelemetry._tracked), 0)
 def _assert_polled_metrics_equal(self,
                                  metrics,
                                  cpu_metric_value,
                                  memory_metric_value,
                                  max_memory_metric_value,
                                  proc_stat_memory_usage_value,
                                  pids=None):
     for metric in metrics:
         self.assertIn(metric.category, ["Process", "Memory"])
         if metric.category == "Process":
             self.assertEqual(metric.counter, "% Processor Time")
             self.assertEqual(metric.value, cpu_metric_value)
         if metric.category == "Memory":
             self.assertIn(metric.counter, [
                 "Total Memory Usage", "Max Memory Usage",
                 "Memory Used by Process"
             ])
             if metric.counter == "Total Memory Usage":
                 self.assertEqual(metric.value, memory_metric_value)
             elif metric.counter == "Max Memory Usage":
                 self.assertEqual(metric.value, max_memory_metric_value)
             elif metric.counter == "Memory Used by Process":
                 if pids:
                     processes_instances = [
                         CGroupsTelemetry.get_process_info_summary(pid)
                         for pid in pids
                     ]
                 else:
                     processes_instances = [
                         CGroupsTelemetry.get_process_info_summary(pid)
                         for pid in TestCGroupsTelemetry.TestProcessIds
                     ]
                 self.assertIn(metric.instance, processes_instances)
                 self.assertEqual(metric.value,
                                  proc_stat_memory_usage_value)
    def test_telemetry_polling_with_inactive_cgroups(self, *_):
        num_extensions = 5
        no_extensions_expected = 0

        self._track_new_extension_cgroups(num_extensions)
        self._assert_cgroups_are_tracked(num_extensions)

        metrics = CGroupsTelemetry.poll_all_tracked()

        for i in range(num_extensions):
            self.assertFalse(
                CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
            self.assertFalse(
                CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(),
                         num_extensions)
        self._assert_calculated_resource_metrics_equal([], [], [], [],
                                                       proc_ids=None)
        self.assertEqual(len(metrics), 0)

        collected_metrics = CGroupsTelemetry.report_all_tracked()
        self._assert_extension_metrics_data(collected_metrics,
                                            num_extensions, [], [], [], [],
                                            is_cpu_present=False,
                                            is_memory_present=False)
        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(),
                         no_extensions_expected)
        self._assert_calculated_resource_metrics_equal([], [], [], [], [])
Пример #5
0
    def exercise_telemetry_instantiation(self, test_cgroup):
        test_extension_name = test_cgroup.name
        CGroupsTelemetry.track_cgroup(test_cgroup)
        self.assertIn('cpu', test_cgroup.cgroups)
        self.assertIn('memory', test_cgroup.cgroups)
        self.assertTrue(CGroupsTelemetry.is_tracked(test_extension_name))
        consume_cpu_time()
        time.sleep(1)
        metrics, limits = CGroupsTelemetry.report_all_tracked()
        my_metrics = metrics[test_extension_name]
        self.assertEqual(len(my_metrics), 2)
        for item in my_metrics:
            metric_family, metric_name, metric_value = item
            if metric_family == "Process":
                self.assertEqual(metric_name, "% Processor Time")
                self.assertGreater(metric_value, 0.0)
            elif metric_family == "Memory":
                self.assertEqual(metric_name, "Total Memory Usage")
                self.assertGreater(metric_value, 100000)
            else:
                self.fail("Unknown metric {0}/{1} value {2}".format(
                    metric_family, metric_name, metric_value))

        my_limits = limits[test_extension_name]
        self.assertIsInstance(my_limits,
                              CGroupsLimits,
                              msg="is not the correct instance")
        self.assertGreater(my_limits.cpu_limit, 0.0)
        self.assertGreater(my_limits.memory_limit, 0.0)
Пример #6
0
 def disable(self, reason):
     self._cgroups_enabled = False
     message = "[CGW] Disabling resource usage monitoring. Reason: {0}".format(reason)
     logger.info(message)  # log as INFO for now, in the future it should be logged as WARNING
     add_event(op=WALAEventOperation.CGroupsDisabled, message=message, is_success=False, log_event=False)
     self.__reset_cpu_quota()
     CGroupsTelemetry.reset()
Пример #7
0
 def track_cgroups(extension_cgroups):
     try:
         for cgroup in extension_cgroups:
             CGroupsTelemetry.track_cgroup(cgroup)
     except Exception as e:
         logger.warn("Cannot add cgroup '{0}' to tracking list; resource usage will not be tracked. "
                     "Error: {1}".format(cgroup.path, ustr(e)))
            def __impl():
                cgroups = self._cgroups_api.create_agent_cgroups()

                if track_cgroups:
                    for cgroup in cgroups:
                        CGroupsTelemetry.track_cgroup(cgroup)

                return cgroups
    def _track_new_extension_cgroups(num_extensions):
        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)
 def test_cgroup_is_tracked(self, *args):
     num_extensions = 5
     self._track_new_extension_cgroups(num_extensions)
     self._assert_cgroups_are_tracked(num_extensions)
     self.assertFalse(
         CGroupsTelemetry.is_tracked("not_present_cpu_dummy_path"))
     self.assertFalse(
         CGroupsTelemetry.is_tracked("not_present_memory_dummy_path"))
    def test_start_extension_command_should_disable_cgroups_and_invoke_the_command_directly_if_systemd_times_out(
            self, _):
        with self._get_cgroup_configurator() as configurator:
            # Systemd has its own internal timeout which is shorter than what we define for extension operation timeout.
            # When systemd times out, it will write a message to stderr and exit with exit code 1.
            # In that case, we will internally recognize the failure due to the non-zero exit code, not as a timeout.
            configurator.mocks.add_command(
                MockCommand(
                    "systemd-run",
                    return_value=1,
                    stdout='',
                    stderr=
                    'Failed to start transient scope unit: Connection timed out'
                ))

            with tempfile.TemporaryFile(dir=self.tmp_dir,
                                        mode="w+b") as stdout:
                with tempfile.TemporaryFile(dir=self.tmp_dir,
                                            mode="w+b") as stderr:
                    with patch("subprocess.Popen",
                               wraps=subprocess.Popen) as popen_patch:
                        CGroupsTelemetry.reset()

                        configurator.start_extension_command(
                            extension_name=
                            "Microsoft.Compute.TestExtension-1.2.3",
                            command="echo 'success'",
                            timeout=300,
                            shell=True,
                            cwd=self.tmp_dir,
                            env={},
                            stdout=stdout,
                            stderr=stderr)

                        self.assertFalse(configurator.enabled(),
                                         "Cgroups should have been disabled")

                        extension_calls = [
                            args[0] for (args, _) in popen_patch.call_args_list
                            if "echo 'success'" in args[0]
                        ]
                        self.assertEqual(
                            2, len(extension_calls),
                            "The extension should have been called twice. Got: {0}"
                            .format(extension_calls))
                        self.assertIn(
                            "systemd-run --unit=Microsoft.Compute.TestExtension_1.2.3",
                            extension_calls[0],
                            "The first call to the extension should have used systemd"
                        )
                        self.assertNotIn(
                            "systemd-run", extension_calls[1],
                            "The second call to the extension should not have used systemd"
                        )

                        self.assertEqual(
                            len(CGroupsTelemetry._tracked), 0,
                            "No cgroups should have been created")
Пример #12
0
 def setUp(self):
     AgentTestCase.setUp(self)
     event.init_event_logger(os.path.join(self.tmp_dir, EVENTS_DIRECTORY))
     CGroupsTelemetry.reset()
     clear_singleton_instances(ProtocolUtil)
     protocol = WireProtocol('endpoint')
     protocol.update_goal_state = MagicMock()
     self.get_protocol = patch('azurelinuxagent.common.protocol.util.ProtocolUtil.get_protocol', return_value=protocol)
     self.get_protocol.start()
 def disable_cgroups(exception):
     self.disable()
     CGroupsTelemetry.reset()
     add_event(AGENT_NAME,
               version=CURRENT_VERSION,
               op=WALAEventOperation.CGroupsCleanUp,
               is_success=False,
               log_event=False,
               message='{0} {1}'.format(message, ustr(exception)))
    def poll_telemetry_metrics(self):
        time_now = datetime.datetime.utcnow()
        if not self.last_cgroup_polling_telemetry:
            self.last_cgroup_polling_telemetry = time_now

        if time_now >= (self.last_cgroup_polling_telemetry +
                        MonitorHandler.CGROUP_TELEMETRY_POLLING_PERIOD):
            CGroupsTelemetry.poll_all_tracked()
            self.last_cgroup_polling_telemetry = time_now
Пример #15
0
    def test_start_extension_command_should_invoke_the_command_directly_if_systemd_times_out(
            self, _):
        # Systemd has its own internal timeout which is shorter than what we define for extension operation timeout.
        # When systemd times out, it will write a message to stderr and exit with exit code 1.
        # In that case, we will internally recognize the failure due to the non-zero exit code, not as a timeout.
        original_popen = subprocess.Popen
        systemd_timeout_command = "echo 'Failed to start transient scope unit: Connection timed out' >&2 && exit 1"

        def mock_popen(*args, **kwargs):
            # If trying to invoke systemd, mock what would happen if systemd timed out internally:
            # write failure to stderr and exit with exit code 1.
            new_args = args
            if "systemd-run" in args[0]:
                new_args = (systemd_timeout_command, )

            return original_popen(new_args, **kwargs)

        expected_output = "[stdout]\n{0}\n\n\n[stderr]\n"

        with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout:
            with tempfile.TemporaryFile(dir=self.tmp_dir,
                                        mode="w+b") as stderr:
                with patch("azurelinuxagent.common.cgroupapi.subprocess.Popen",
                           side_effect=mock_popen) as popen_patch:
                    CGroupsTelemetry.reset()

                    SystemdCgroupsApi().start_extension_command(
                        extension_name="Microsoft.Compute.TestExtension-1.2.3",
                        command="echo 'success'",
                        timeout=300,
                        shell=True,
                        cwd=self.tmp_dir,
                        env={},
                        stdout=stdout,
                        stderr=stderr)

                    extension_calls = [
                        args[0] for (args, _) in popen_patch.call_args_list
                        if "echo 'success'" in args[0]
                    ]

                    self.assertEquals(
                        2, len(extension_calls),
                        "The extension should have been invoked exactly twice")
                    self.assertIn(
                        "systemd-run --unit=Microsoft.Compute.TestExtension_1.2.3",
                        extension_calls[0],
                        "The first call to the extension should have used systemd"
                    )
                    self.assertEquals(
                        "echo 'success'", extension_calls[1],
                        "The second call to the extension should not have used systemd"
                    )

                    self.assertEquals(len(CGroupsTelemetry._tracked), 0,
                                      "No cgroups should have been created")
Пример #16
0
        def __reset_cpu_quota():
            """
            Removes any CPUQuota on the agent

            NOTE: This resets the quota on the agent's default dropin file; any local overrides on the VM will take precedence
            over this setting.
            """
            logger.info("Resetting agent's CPUQuota")
            if CGroupConfigurator._Impl.__try_set_cpu_quota(''):  # setting an empty value resets to the default (infinity)
                CGroupsTelemetry.set_track_throttled_time(False)
Пример #17
0
    def test_cleanup_legacy_cgroups_should_disable_cgroups_when_it_fails_to_process_legacy_cgroups(
            self):
        # Set up a mock /var/run/waagent.pid file
        daemon_pid = "42"
        daemon_pid_file = os.path.join(self.tmp_dir, "waagent.pid")
        fileutil.write_file(daemon_pid_file, daemon_pid + "\n")

        # Set up old controller cgroups and add the daemon PID to them
        CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root,
                                                "cpu", daemon_pid)
        CGroupsTools.create_legacy_agent_cgroup(self.cgroups_file_system_root,
                                                "memory", daemon_pid)

        # Set up new controller cgroups and add extension handler's PID to them
        CGroupsTools.create_agent_cgroup(self.cgroups_file_system_root, "cpu",
                                         "999")
        CGroupsTools.create_agent_cgroup(self.cgroups_file_system_root,
                                         "memory", "999")

        def mock_append_file(filepath, contents, **kwargs):
            if re.match(r'/.*/cpu/.*/cgroup.procs', filepath):
                raise OSError(errno.ENOSPC, os.strerror(errno.ENOSPC))
            fileutil.append_file(filepath, contents, **kwargs)

        # Start tracking a couple of dummy cgroups
        CGroupsTelemetry.track_cgroup(
            CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service",
                   "cpu"))
        CGroupsTelemetry.track_cgroup(
            CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service",
                   "memory"))

        cgroup_configurator = CGroupConfigurator.get_instance()

        with patch("azurelinuxagent.common.cgroupconfigurator.add_event"
                   ) as mock_add_event:
            with patch(
                    "azurelinuxagent.common.cgroupapi.get_agent_pid_file_path",
                    return_value=daemon_pid_file):
                with patch(
                        "azurelinuxagent.common.cgroupapi.fileutil.append_file",
                        side_effect=mock_append_file):
                    cgroup_configurator.cleanup_legacy_cgroups()

        self.assertEquals(len(mock_add_event.call_args_list), 1)
        _, kwargs = mock_add_event.call_args_list[0]
        self.assertEquals(kwargs['op'], 'CGroupsCleanUp')
        self.assertFalse(kwargs['is_success'])
        self.assertEquals(
            kwargs['message'],
            'Failed to process legacy cgroups. Collection of resource usage data will be disabled. [Errno 28] No space left on device'
        )

        self.assertFalse(cgroup_configurator.enabled())
        self.assertEquals(len(CGroupsTelemetry._tracked), 0)
Пример #18
0
    def test_disable_should_reset_tracked_cgroups(self):
        configurator = CGroupConfigurator.get_instance()

        # Start tracking a couple of dummy cgroups
        CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "cpu"))
        CGroupsTelemetry.track_cgroup(CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service", "memory"))

        configurator.disable()

        self.assertFalse(configurator.enabled())
        self.assertEquals(len(CGroupsTelemetry._tracked), 0)
Пример #19
0
        def __set_cpu_quota(quota):
            """
            Sets the agent's CPU quota to the given percentage (100% == 1 CPU)

            NOTE: This is done using a dropin file in the default dropin directory; any local overrides on the VM will take precedence
            over this setting.
            """
            quota_percentage = "{0}%".format(quota)
            _log_cgroup_info("Ensuring the agent's CPUQuota is {0}", quota_percentage)
            if CGroupConfigurator._Impl.__try_set_cpu_quota(quota_percentage):
                CGroupsTelemetry.set_track_throttled_time(True)
    def test_telemetry_polling_to_generate_transient_logs_index_error(self):
        num_extensions = 1
        self._track_new_extension_cgroups(num_extensions)

        # Generating a different kind of error (non-IOError) to check the logging.
        # Trying to invoke IndexError during the getParameter call
        with patch("azurelinuxagent.common.utils.fileutil.read_file", return_value=''):
            with patch("azurelinuxagent.common.logger.periodic_warn") as patch_periodic_warn:
                expected_call_count = 2  # 1 periodic warning for the cpu cgroups, and 1 for memory
                for data_count in range(1, 10):
                    CGroupsTelemetry.poll_all_tracked()
                    self.assertEqual(expected_call_count, patch_periodic_warn.call_count)
Пример #21
0
        def stop_tracking_unit_cgroups(self, unit_name):
            """
            TODO: remove Memory cgroups from tracked list.
            """
            try:
                cpu_cgroup_path, _ = self._cgroups_api.get_unit_cgroup_paths(unit_name)

                if cpu_cgroup_path is not None:
                    CGroupsTelemetry.stop_tracking(CpuCgroup(unit_name, cpu_cgroup_path))

            except Exception as exception:
                logger.info("Failed to stop tracking resource usage for the extension service: {0}", ustr(exception))
    def test_process_cgroup_metric_with_no_cpu_cgroup_mounted(self, *args):
        num_extensions = 5

        self._track_new_extension_cgroups(num_extensions)

        with patch(
                "azurelinuxagent.common.cgroup.MemoryCgroup.get_max_memory_usage"
        ) as patch_get_memory_max_usage:
            with patch(
                    "azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage"
            ) as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CGroup.is_active"
                           ) as patch_is_active:
                    patch_is_active.return_value = True

                    current_memory = 209715200
                    current_max_memory = 471859200

                    patch_get_memory_usage.return_value = current_memory  # example 200 MB
                    patch_get_memory_max_usage.return_value = current_max_memory  # example 450 MB
                    num_polls = 10
                    for data_count in range(1, num_polls + 1):
                        metrics = CGroupsTelemetry.poll_all_tracked()
                        self.assertEqual(len(CGroupsTelemetry._cgroup_metrics),
                                         num_extensions)
                        self._assert_calculated_resource_metrics_equal(
                            cpu_usage=[],
                            memory_usage=[current_memory] * data_count,
                            max_memory_usage=[current_max_memory] * data_count,
                            memory_statm_memory_usage=[
                                TestCGroupsTelemetry.TestProcStatmMemoryUsed
                            ] * data_count,
                            proc_ids=TestCGroupsTelemetry.TestProcessIds)
                        # Memory is only populated, CPU is not. Thus 5 metrics per cgroup.
                        self.assertEqual(len(metrics), num_extensions * 5)
                        self._assert_polled_metrics_equal(
                            metrics, 0, current_memory, current_max_memory,
                            TestCGroupsTelemetry.TestProcStatmMemoryUsed)

                    collected_metrics = CGroupsTelemetry.report_all_tracked()
                    self._assert_extension_metrics_data(
                        collected_metrics,
                        num_extensions, [],
                        [TestCGroupsTelemetry.TestProcStatmMemoryUsed] *
                        num_polls, [current_memory] * num_polls,
                        [current_max_memory] * num_polls,
                        is_cpu_present=False)

                    self.assertEqual(len(CGroupsTelemetry._cgroup_metrics),
                                     num_extensions)
                    self._assert_calculated_resource_metrics_equal([], [], [],
                                                                   [], [])
    def test_disable_should_reset_tracked_cgroups(self):
        # Start tracking a couple of dummy cgroups
        CGroupsTelemetry.track_cgroup(
            CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service",
                   "cpu"))
        CGroupsTelemetry.track_cgroup(
            CGroup("dummy", "/sys/fs/cgroup/memory/system.slice/dummy.service",
                   "memory"))

        CGroupConfiguratorSystemdTestCase._get_new_cgroup_configurator_instance(
        ).disable()

        self.assertEquals(len(CGroupsTelemetry._tracked), 0)
    def test_cgroup_pruning(self, *args):
        num_extensions = 5
        num_controllers = 2
        self._track_new_extension_cgroups(num_extensions)
        self._assert_cgroups_are_tracked(num_extensions)
        self.assertEqual(num_extensions * num_controllers, len(CGroupsTelemetry._tracked))

        CGroupsTelemetry.prune_all_tracked()
        for i in range(num_extensions):
            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

        self.assertEqual(0, len(CGroupsTelemetry._tracked))
Пример #25
0
        def start_tracking_unit_cgroups(self, unit_name):
            """
            TODO: Start tracking Memory Cgroups
            """
            try:
                cpu_cgroup_path, _ = self._cgroups_api.get_unit_cgroup_paths(unit_name)

                if cpu_cgroup_path is None:
                    logger.info("The CPU controller is not mounted; will not track resource usage")
                else:
                    CGroupsTelemetry.track_cgroup(CpuCgroup(unit_name, cpu_cgroup_path))

            except Exception as exception:
                logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(exception))
    def test_telemetry_polling_to_not_generate_transient_logs_ioerror_file_not_found(self, patch_periodic_warn):
        num_extensions = 1
        self._track_new_extension_cgroups(num_extensions)
        self.assertEqual(0, patch_periodic_warn.call_count)

        # Not expecting logs present for io_error with errno=errno.ENOENT
        io_error_2 = IOError()
        io_error_2.errno = errno.ENOENT

        with patch("azurelinuxagent.common.utils.fileutil.read_file", side_effect=io_error_2):
            poll_count = 1
            for data_count in range(poll_count, 10):
                CGroupsTelemetry.poll_all_tracked()
                self.assertEqual(0, patch_periodic_warn.call_count)
Пример #27
0
    def test_process_cgroup_metric_with_no_memory_cgroup_mounted(self, *args):
        num_extensions = 5

        for i in range(num_extensions):
            dummy_cpu_cgroup = CGroup.create("dummy_cpu_path_{0}".format(i), "cpu", "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_cpu_cgroup)

            dummy_memory_cgroup = CGroup.create("dummy_memory_path_{0}".format(i), "memory",
                                                "dummy_extension_{0}".format(i))
            CGroupsTelemetry.track_cgroup(dummy_memory_cgroup)

        with patch("azurelinuxagent.common.cgroup.CpuCgroup._get_cpu_percent") as patch_get_cpu_percent:
            with patch("azurelinuxagent.common.cgroup.MemoryCgroup.get_memory_usage") as patch_get_memory_usage:
                with patch("azurelinuxagent.common.cgroup.CGroup.is_active") as patch_is_active:
                    patch_is_active.return_value = True
                    patch_get_memory_usage.side_effect = Exception("File not found")

                    current_cpu = 30
                    patch_get_cpu_percent.return_value = current_cpu

                    poll_count = 1

                    for data_count in range(poll_count, 10):
                        CGroupsTelemetry.poll_all_tracked()

                        self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)
                        self._assert_cgroup_metrics_equal(cpu_usage=[current_cpu] * data_count, memory_usage=[], max_memory_usage=[])

                    CGroupsTelemetry.report_all_tracked()

                    self.assertEqual(CGroupsTelemetry._cgroup_metrics.__len__(), num_extensions)
                    self._assert_cgroup_metrics_equal([], [], [])
    def test_telemetry_polling_with_inactive_cgroups(self, *_):
        num_extensions = 5
        no_extensions_expected = 0

        self._track_new_extension_cgroups(num_extensions)
        self._assert_cgroups_are_tracked(num_extensions)

        metrics = CGroupsTelemetry.poll_all_tracked()

        for i in range(num_extensions):
            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_cpu_path_{0}".format(i)))
            self.assertFalse(CGroupsTelemetry.is_tracked("dummy_memory_path_{0}".format(i)))

        self.assertEqual(len(metrics), 0)
    def test_enable_should_not_track_throttled_time_when_setting_the_cpu_quota_fails(
            self):
        with self._get_cgroup_configurator(enable=False) as configurator:
            if CGroupsTelemetry.get_track_throttled_time():
                raise Exception(
                    "Test setup should not start tracking Throttle Time")

            configurator.mocks.add_file(UnitFilePaths.cpu_quota,
                                        Exception("A TEST EXCEPTION"))

            configurator.enable()

            self.assertFalse(CGroupsTelemetry.get_track_throttled_time(),
                             "Throttle time should not be tracked")
Пример #30
0
        def initialize(self):
            try:
                if self._initialized:
                    return

                # check whether cgroup monitoring is supported on the current distro
                self._cgroups_supported = CGroupsApi.cgroups_supported()
                if not self._cgroups_supported:
                    logger.info("Cgroup monitoring is not supported on {0}", get_distro())
                    return

                # check that systemd is detected correctly
                self._cgroups_api = SystemdCgroupsApi()
                if not systemd.is_systemd():
                    _log_cgroup_warning("systemd was not detected on {0}", get_distro())
                    return

                _log_cgroup_info("systemd version: {0}", systemd.get_version())

                # This is temporarily disabled while we analyze telemetry. Likely it will be removed.
                # self.__collect_azure_unit_telemetry()
                # self.__collect_agent_unit_files_telemetry()

                if not self.__check_no_legacy_cgroups():
                    return

                agent_unit_name = systemd.get_agent_unit_name()
                agent_slice = systemd.get_unit_property(agent_unit_name, "Slice")
                if agent_slice not in (_AZURE_SLICE, "system.slice"):
                    _log_cgroup_warning("The agent is within an unexpected slice: {0}", agent_slice)
                    return

                self.__setup_azure_slice()

                cpu_controller_root, memory_controller_root = self.__get_cgroup_controllers()
                self._agent_cpu_cgroup_path, self._agent_memory_cgroup_path = self.__get_agent_cgroups(agent_slice, cpu_controller_root, memory_controller_root)

                if self._agent_cpu_cgroup_path is not None:
                    _log_cgroup_info("Agent CPU cgroup: {0}", self._agent_cpu_cgroup_path)
                    self.enable()
                    CGroupsTelemetry.track_cgroup(CpuCgroup(AGENT_NAME_TELEMETRY, self._agent_cpu_cgroup_path))

                _log_cgroup_info('Cgroups enabled: {0}', self._cgroups_enabled)

            except Exception as exception:
                _log_cgroup_warning("Error initializing cgroups: {0}", ustr(exception))
            finally:
                self._initialized = True