def test_handle_process_completion_should_raise_on_timeout(self):
        command = "sleep 1m"
        timeout = 20
        with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout:
            with tempfile.TemporaryFile(dir=self.tmp_dir,
                                        mode="w+b") as stderr:
                with patch('time.sleep') as mock_sleep:
                    with self.assertRaises(ExtensionError) as context_manager:
                        process = subprocess.Popen(
                            command,  # pylint: disable=subprocess-popen-preexec-fn
                            shell=True,
                            cwd=self.tmp_dir,
                            env={},
                            stdout=stdout,
                            stderr=stderr,
                            preexec_fn=os.setsid)

                        handle_process_completion(process=process,
                                                  command=command,
                                                  timeout=timeout,
                                                  stdout=stdout,
                                                  stderr=stderr,
                                                  error_code=42)

                    # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
                    # we're "waiting" the correct amount of time before killing the process and raising an exception
                    # Due to an extra call to sleep at some point in the call stack which only happens sometimes,
                    # we are relaxing this assertion to allow +/- 2 sleep calls.
                    self.assertTrue(abs(mock_sleep.call_count - timeout) <= 2)

                    self.assertEqual(
                        context_manager.exception.code,
                        ExtensionErrorCodes.PluginHandlerScriptTimedout)
                    self.assertIn("Timeout({0})".format(timeout),
                                  ustr(context_manager.exception))
    def test_handle_process_completion_should_raise_on_nonzero_exit_code(self):
        command = "ls folder_does_not_exist"
        error_code = 42
        with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout:
            with tempfile.TemporaryFile(dir=self.tmp_dir,
                                        mode="w+b") as stderr:
                with self.assertRaises(ExtensionError) as context_manager:
                    process = subprocess.Popen(
                        command,  # pylint: disable=subprocess-popen-preexec-fn
                        shell=True,
                        cwd=self.tmp_dir,
                        env={},
                        stdout=stdout,
                        stderr=stderr,
                        preexec_fn=os.setsid)

                    handle_process_completion(process=process,
                                              command=command,
                                              timeout=4,
                                              stdout=stdout,
                                              stderr=stderr,
                                              error_code=error_code)

                self.assertEqual(context_manager.exception.code, error_code)
                self.assertIn("Non-zero exit code:",
                              ustr(context_manager.exception))
    def test_handle_process_completion_should_raise_on_timeout(self):
        command = "sleep 1m"
        timeout = 5
        with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout:
            with tempfile.TemporaryFile(dir=self.tmp_dir,
                                        mode="w+b") as stderr:
                with patch('time.sleep') as mock_sleep:
                    with self.assertRaises(ExtensionError) as context_manager:
                        process = subprocess.Popen(command,
                                                   shell=True,
                                                   cwd=self.tmp_dir,
                                                   env={},
                                                   stdout=stdout,
                                                   stderr=stderr,
                                                   preexec_fn=os.setsid)

                        handle_process_completion(process=process,
                                                  command=command,
                                                  timeout=timeout,
                                                  stdout=stdout,
                                                  stderr=stderr,
                                                  error_code=42)

                    # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure
                    # we're "waiting" the correct amount of time before killing the process and raising an exception
                    self.assertEquals(mock_sleep.call_count, timeout)

                    self.assertEquals(
                        context_manager.exception.code,
                        ExtensionErrorCodes.PluginHandlerScriptTimedout)
                    self.assertIn("Timeout({0})".format(timeout),
                                  ustr(context_manager.exception))
Пример #4
0
        def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure):
            """
            Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup
            :param extension_name: The extension executing the command
            :param command: The command to invoke
            :param cmd_name: The type of the command(enable, install, etc.)
            :param timeout: Number of seconds to wait for command completion
            :param cwd: The working directory for the command
            :param env:  The environment to pass to the command's process
            :param stdout: File object to redirect stdout to
            :param stderr: File object to redirect stderr to
            :param stderr: File object to redirect stderr to
            :param error_code: Extension error code to raise in case of error
            """
            if self.enabled():
                try:
                    return self._cgroups_api.start_extension_command(extension_name, command, cmd_name, timeout, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, error_code=error_code)
                except SystemdRunError as exception:
                    reason = 'Failed to start {0} using systemd-run, will try invoking the extension directly. Error: {1}'.format(extension_name, ustr(exception))
                    self.disable(reason, DisableCgroups.ALL)
                    # fall-through and re-invoke the extension

            # subprocess-popen-preexec-fn<W1509> Disabled: code is not multi-threaded
            process = subprocess.Popen(command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, preexec_fn=os.setsid)  # pylint: disable=W1509
            return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code)
Пример #5
0
    def start_extension_command(self, extension_name, command, timeout, shell, cwd, env, stdout, stderr,
                                error_code=ExtensionErrorCodes.PluginUnknownFailure):
        """
        Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup
        :param extension_name: The extension executing the command
        :param command: The command to invoke
        :param timeout: Number of seconds to wait for command completion
        :param cwd: The working directory for the command
        :param env: The environment to pass to the command's process
        :param stdout: File object to redirect stdout to
        :param stderr: File object to redirect stderr to
        :param error_code: Extension error code to raise in case of error
        """
        try:
            extension_cgroups = self.create_extension_cgroups(extension_name)
        except Exception as exception:
            extension_cgroups = []
            logger.warn("Failed to create cgroups for extension '{0}'; resource usage will not be tracked. "
                        "Error: {1}".format(extension_name, ustr(exception)))

        def pre_exec_function():
            os.setsid()

            try:
                pid = os.getpid()

                for cgroup in extension_cgroups:
                    try:
                        self._add_process_to_cgroup(pid, cgroup.path)
                    except Exception as exception:
                        logger.warn("Failed to add PID {0} to the cgroups for extension '{1}'. "
                                    "Resource usage will not be tracked. Error: {2}".format(pid,
                                                                                            extension_name,
                                                                                            ustr(exception)))
            except Exception as e:
                logger.warn("Failed to add extension {0} to its cgroup. Resource usage will not be tracked. "
                            "Error: {1}".format(extension_name, ustr(e)))

        process = subprocess.Popen(command,
                                   shell=shell,
                                   cwd=cwd,
                                   env=env,
                                   stdout=stdout,
                                   stderr=stderr,
                                   preexec_fn=pre_exec_function)

        self.track_cgroups(extension_cgroups)
        process_output = handle_process_completion(process=process,
                                                   command=command,
                                                   timeout=timeout,
                                                   stdout=stdout,
                                                   stderr=stderr,
                                                   error_code=error_code)

        return extension_cgroups, process_output
Пример #6
0
        def start_extension_command(
            self,
            extension_name,
            command,
            timeout,
            shell,
            cwd,
            env,
            stdout,
            stderr,
            error_code=ExtensionErrorCodes.PluginUnknownFailure):  # pylint: disable=R0913
            """
            Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup
            :param extension_name: The extension executing the command
            :param command: The command to invoke
            :param timeout: Number of seconds to wait for command completion
            :param cwd: The working directory for the command
            :param env:  The environment to pass to the command's process
            :param stdout: File object to redirect stdout to
            :param stderr: File object to redirect stderr to
            :param stderr: File object to redirect stderr to
            :param error_code: Extension error code to raise in case of error
            """
            if not self.enabled():
                # subprocess-popen-preexec-fn<W1509> Disabled: code is not multi-threaded
                process = subprocess.Popen(
                    command,  # pylint: disable=W1509
                    shell=shell,
                    cwd=cwd,
                    env=env,
                    stdout=stdout,
                    stderr=stderr,
                    preexec_fn=os.setsid)

                process_output = handle_process_completion(
                    process=process,
                    command=command,
                    timeout=timeout,
                    stdout=stdout,
                    stderr=stderr,
                    error_code=error_code)
            else:
                process_output = self._cgroups_api.start_extension_command(
                    extension_name,
                    command,
                    timeout,
                    shell=shell,
                    cwd=cwd,
                    env=env,
                    stdout=stdout,
                    stderr=stderr,
                    error_code=error_code)

            return process_output
    def test_handle_process_completion_should_return_process_output(self):
        command = "echo 'dummy stdout' && 1>&2 echo 'dummy stderr'"
        with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout:
            with tempfile.TemporaryFile(dir=self.tmp_dir,
                                        mode="w+b") as stderr:
                process = subprocess.Popen(command,
                                           shell=True,
                                           cwd=self.tmp_dir,
                                           env={},
                                           stdout=stdout,
                                           stderr=stderr,
                                           preexec_fn=os.setsid)

                process_output = handle_process_completion(process=process,
                                                           command=command,
                                                           timeout=5,
                                                           stdout=stdout,
                                                           stderr=stderr,
                                                           error_code=42)

        expected_output = "[stdout]\ndummy stdout\n\n\n[stderr]\ndummy stderr\n"
        self.assertEquals(process_output, expected_output)
Пример #8
0
    def start_extension_command(self, extension_name, command, timeout, shell, cwd, env, stdout, stderr,
                                error_code=ExtensionErrorCodes.PluginUnknownFailure):
        scope = "{0}_{1}".format(self._get_extension_cgroup_name(extension_name), uuid.uuid4())

        process = subprocess.Popen(
            "systemd-run --unit={0} --scope {1}".format(scope, command),
            shell=shell,
            cwd=cwd,
            stdout=stdout,
            stderr=stderr,
            env=env,
            preexec_fn=os.setsid)

        scope_name = scope + '.scope'

        logger.info("Started extension in unit '{0}'", scope_name)

        try:
            # systemd-run creates the scope under the system slice by default
            cgroup_relative_path = os.path.join('system.slice', scope_name)

            cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_cgroup_mount_points()

            if cpu_cgroup_mountpoint is None:
                logger.info("The CPU controller is not mounted; will not track resource usage")
            else:
                cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path)
                CGroupsTelemetry.track_cgroup(CpuCgroup(extension_name, cpu_cgroup_path))

            if memory_cgroup_mountpoint is None:
                logger.info("The memory controller is not mounted; will not track resource usage")
            else:
                memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path)
                CGroupsTelemetry.track_cgroup(MemoryCgroup(extension_name, memory_cgroup_path))

        except IOError as e:
            if e.errno == 2:  # 'No such file or directory'
                logger.info("The extension command already completed; will not track resource usage")
            logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e))
        except Exception as e:
            logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e))

        # Wait for process completion or timeout
        try:
            process_output = handle_process_completion(process=process,
                                                       command=command,
                                                       timeout=timeout,
                                                       stdout=stdout,
                                                       stderr=stderr,
                                                       error_code=error_code)
        except ExtensionError as e:
            # The extension didn't terminate successfully. Determine whether it was due to systemd errors or
            # extension errors.
            process_output = read_output(stdout, stderr)
            systemd_failure = self._is_systemd_failure(scope, process_output)

            if not systemd_failure:
                # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error
                raise
            else:
                # There was an issue with systemd-run. We need to log it and retry the extension without systemd.
                err_msg = 'Systemd process exited with code %s and output %s' % (e.exit_code, process_output) \
                    if isinstance(e, ExtensionOperationError) else "Systemd timed-out, output: %s" % process_output
                event_msg = 'Failed to run systemd-run for unit {0}.scope. ' \
                            'Will retry invoking the extension without systemd. ' \
                            'Systemd-run error: {1}'.format(scope, err_msg)
                add_event(op=WALAEventOperation.InvokeCommandUsingSystemd, is_success=False, log_event=False, message=event_msg)
                logger.warn(event_msg)

                # Reset the stdout and stderr
                stdout.truncate(0)
                stderr.truncate(0)

                # Try invoking the process again, this time without systemd-run
                logger.info('Extension invocation using systemd failed, falling back to regular invocation '
                            'without cgroups tracking.')
                process = subprocess.Popen(command,
                                           shell=shell,
                                           cwd=cwd,
                                           env=env,
                                           stdout=stdout,
                                           stderr=stderr,
                                           preexec_fn=os.setsid)

                process_output = handle_process_completion(process=process,
                                                           command=command,
                                                           timeout=timeout,
                                                           stdout=stdout,
                                                           stderr=stderr,
                                                           error_code=error_code)

                return process_output

        # The process terminated in time and successfully
        return process_output
Пример #9
0
    def start_extension_command(
            self,
            extension_name,
            command,
            cmd_name,
            timeout,
            shell,
            cwd,
            env,
            stdout,
            stderr,
            error_code=ExtensionErrorCodes.PluginUnknownFailure):
        scope = "{0}_{1}".format(cmd_name, uuid.uuid4())
        extension_slice_name = self.get_extension_slice_name(extension_name)
        with self._systemd_run_commands_lock:
            process = subprocess.Popen(  # pylint: disable=W1509
                "systemd-run --unit={0} --scope --slice={1} {2}".format(
                    scope, extension_slice_name, command),
                shell=shell,
                cwd=cwd,
                stdout=stdout,
                stderr=stderr,
                env=env,
                preexec_fn=os.setsid)

            # We start systemd-run with shell == True so process.pid is the shell's pid, not the pid for systemd-run
            self._systemd_run_commands.append(process.pid)

        scope_name = scope + '.scope'

        logger.info("Started extension in unit '{0}'", scope_name)

        try:
            cgroup_relative_path = os.path.join(
                'azure.slice/azure-vmextensions.slice', extension_slice_name)

            cpu_cgroup_mountpoint, _ = self.get_cgroup_mount_points()

            if cpu_cgroup_mountpoint is None:
                logger.info(
                    "The CPU controller is not mounted; will not track resource usage"
                )
            else:
                cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint,
                                               cgroup_relative_path)
                CGroupsTelemetry.track_cgroup(
                    CpuCgroup(extension_name, cpu_cgroup_path))

        except IOError as e:
            if e.errno == 2:  # 'No such file or directory'
                logger.info(
                    "The extension command already completed; will not track resource usage"
                )
            logger.info(
                "Failed to start tracking resource usage for the extension: {0}",
                ustr(e))
        except Exception as e:
            logger.info(
                "Failed to start tracking resource usage for the extension: {0}",
                ustr(e))

        # Wait for process completion or timeout
        try:
            return handle_process_completion(process=process,
                                             command=command,
                                             timeout=timeout,
                                             stdout=stdout,
                                             stderr=stderr,
                                             error_code=error_code)
        except ExtensionError as e:
            # The extension didn't terminate successfully. Determine whether it was due to systemd errors or
            # extension errors.
            if not self._is_systemd_failure(scope, stderr):
                # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error
                raise

            # There was an issue with systemd-run. We need to log it and retry the extension without systemd.
            process_output = read_output(stdout, stderr)
            # Reset the stdout and stderr
            stdout.truncate(0)
            stderr.truncate(0)

            if isinstance(e, ExtensionOperationError):
                # no-member: Instance of 'ExtensionError' has no 'exit_code' member (no-member) - Disabled: e is actually an ExtensionOperationError
                err_msg = 'Systemd process exited with code %s and output %s' % (
                    e.exit_code, process_output)  # pylint: disable=no-member
            else:
                err_msg = "Systemd timed-out, output: %s" % process_output
            raise SystemdRunError(err_msg)
        finally:
            with self._systemd_run_commands_lock:
                self._systemd_run_commands.remove(process.pid)
    def start_extension_command(
            self,
            extension_name,
            command,
            timeout,
            shell,
            cwd,
            env,
            stdout,
            stderr,
            error_code=ExtensionErrorCodes.PluginUnknownFailure):
        scope_name = "{0}_{1}".format(
            self._get_extension_cgroup_name(extension_name), uuid.uuid4())

        process = subprocess.Popen("systemd-run --unit={0} --scope {1}".format(
            scope_name, command),
                                   shell=shell,
                                   cwd=cwd,
                                   stdout=stdout,
                                   stderr=stderr,
                                   env=env,
                                   preexec_fn=os.setsid)

        logger.info("Started extension using scope '{0}'", scope_name)
        extension_cgroups = []

        def create_cgroup(controller):
            cgroup_path = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, controller,
                                       'system.slice', scope_name + ".scope")
            extension_cgroups.append(
                CGroup.create(cgroup_path, controller, extension_name))

        self._foreach_controller(
            create_cgroup, 'Cannot create cgroup for extension {0}; '
            'resource usage will not be tracked.'.format(extension_name))
        self.track_cgroups(extension_cgroups)

        # Wait for process completion or timeout
        try:
            process_output = handle_process_completion(process=process,
                                                       command=command,
                                                       timeout=timeout,
                                                       stdout=stdout,
                                                       stderr=stderr,
                                                       error_code=error_code)
        except ExtensionError as e:
            # The extension didn't terminate successfully. Determine whether it was due to systemd errors or
            # extension errors.
            process_output = read_output(stdout, stderr)
            systemd_failure = self._is_systemd_failure(scope_name,
                                                       process_output)

            if not systemd_failure:
                # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error
                raise
            else:
                # There was an issue with systemd-run. We need to log it and retry the extension without systemd.
                err_msg = 'Systemd process exited with code %s and output %s' % (e.exit_code, process_output) \
                    if isinstance(e, ExtensionOperationError) else "Systemd timed-out, output: %s" % process_output
                event_msg = 'Failed to run systemd-run for unit {0}.scope. ' \
                            'Will retry invoking the extension without systemd. ' \
                            'Systemd-run error: {1}'.format(scope_name, err_msg)
                add_event(AGENT_NAME,
                          version=CURRENT_VERSION,
                          op=WALAEventOperation.InvokeCommandUsingSystemd,
                          is_success=False,
                          log_event=False,
                          message=event_msg)
                logger.warn(event_msg)

                # Reset the stdout and stderr
                stdout.truncate(0)
                stderr.truncate(0)

                # Try invoking the process again, this time without systemd-run
                logger.info(
                    'Extension invocation using systemd failed, falling back to regular invocation '
                    'without cgroups tracking.')
                process = subprocess.Popen(command,
                                           shell=shell,
                                           cwd=cwd,
                                           env=env,
                                           stdout=stdout,
                                           stderr=stderr,
                                           preexec_fn=os.setsid)

                process_output = handle_process_completion(
                    process=process,
                    command=command,
                    timeout=timeout,
                    stdout=stdout,
                    stderr=stderr,
                    error_code=error_code)

                return [], process_output

        # The process terminated in time and successfully
        return extension_cgroups, process_output