def test_handle_process_completion_should_raise_on_timeout(self): command = "sleep 1m" timeout = 20 with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: with patch('time.sleep') as mock_sleep: with self.assertRaises(ExtensionError) as context_manager: process = subprocess.Popen( command, # pylint: disable=subprocess-popen-preexec-fn shell=True, cwd=self.tmp_dir, env={}, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=42) # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure # we're "waiting" the correct amount of time before killing the process and raising an exception # Due to an extra call to sleep at some point in the call stack which only happens sometimes, # we are relaxing this assertion to allow +/- 2 sleep calls. self.assertTrue(abs(mock_sleep.call_count - timeout) <= 2) self.assertEqual( context_manager.exception.code, ExtensionErrorCodes.PluginHandlerScriptTimedout) self.assertIn("Timeout({0})".format(timeout), ustr(context_manager.exception))
def test_handle_process_completion_should_raise_on_nonzero_exit_code(self): command = "ls folder_does_not_exist" error_code = 42 with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: with self.assertRaises(ExtensionError) as context_manager: process = subprocess.Popen( command, # pylint: disable=subprocess-popen-preexec-fn shell=True, cwd=self.tmp_dir, env={}, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) handle_process_completion(process=process, command=command, timeout=4, stdout=stdout, stderr=stderr, error_code=error_code) self.assertEqual(context_manager.exception.code, error_code) self.assertIn("Non-zero exit code:", ustr(context_manager.exception))
def test_handle_process_completion_should_raise_on_timeout(self): command = "sleep 1m" timeout = 5 with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: with patch('time.sleep') as mock_sleep: with self.assertRaises(ExtensionError) as context_manager: process = subprocess.Popen(command, shell=True, cwd=self.tmp_dir, env={}, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=42) # We're mocking sleep to avoid prolonging the test execution time, but we still want to make sure # we're "waiting" the correct amount of time before killing the process and raising an exception self.assertEquals(mock_sleep.call_count, timeout) self.assertEquals( context_manager.exception.code, ExtensionErrorCodes.PluginHandlerScriptTimedout) self.assertIn("Timeout({0})".format(timeout), ustr(context_manager.exception))
def start_extension_command(self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): """ Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup :param extension_name: The extension executing the command :param command: The command to invoke :param cmd_name: The type of the command(enable, install, etc.) :param timeout: Number of seconds to wait for command completion :param cwd: The working directory for the command :param env: The environment to pass to the command's process :param stdout: File object to redirect stdout to :param stderr: File object to redirect stderr to :param stderr: File object to redirect stderr to :param error_code: Extension error code to raise in case of error """ if self.enabled(): try: return self._cgroups_api.start_extension_command(extension_name, command, cmd_name, timeout, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, error_code=error_code) except SystemdRunError as exception: reason = 'Failed to start {0} using systemd-run, will try invoking the extension directly. Error: {1}'.format(extension_name, ustr(exception)) self.disable(reason, DisableCgroups.ALL) # fall-through and re-invoke the extension # subprocess-popen-preexec-fn<W1509> Disabled: code is not multi-threaded process = subprocess.Popen(command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) # pylint: disable=W1509 return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code)
def start_extension_command(self, extension_name, command, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): """ Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup :param extension_name: The extension executing the command :param command: The command to invoke :param timeout: Number of seconds to wait for command completion :param cwd: The working directory for the command :param env: The environment to pass to the command's process :param stdout: File object to redirect stdout to :param stderr: File object to redirect stderr to :param error_code: Extension error code to raise in case of error """ try: extension_cgroups = self.create_extension_cgroups(extension_name) except Exception as exception: extension_cgroups = [] logger.warn("Failed to create cgroups for extension '{0}'; resource usage will not be tracked. " "Error: {1}".format(extension_name, ustr(exception))) def pre_exec_function(): os.setsid() try: pid = os.getpid() for cgroup in extension_cgroups: try: self._add_process_to_cgroup(pid, cgroup.path) except Exception as exception: logger.warn("Failed to add PID {0} to the cgroups for extension '{1}'. " "Resource usage will not be tracked. Error: {2}".format(pid, extension_name, ustr(exception))) except Exception as e: logger.warn("Failed to add extension {0} to its cgroup. Resource usage will not be tracked. " "Error: {1}".format(extension_name, ustr(e))) process = subprocess.Popen(command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, preexec_fn=pre_exec_function) self.track_cgroups(extension_cgroups) process_output = handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code) return extension_cgroups, process_output
def start_extension_command( self, extension_name, command, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): # pylint: disable=R0913 """ Starts a command (install/enable/etc) for an extension and adds the command's PID to the extension's cgroup :param extension_name: The extension executing the command :param command: The command to invoke :param timeout: Number of seconds to wait for command completion :param cwd: The working directory for the command :param env: The environment to pass to the command's process :param stdout: File object to redirect stdout to :param stderr: File object to redirect stderr to :param stderr: File object to redirect stderr to :param error_code: Extension error code to raise in case of error """ if not self.enabled(): # subprocess-popen-preexec-fn<W1509> Disabled: code is not multi-threaded process = subprocess.Popen( command, # pylint: disable=W1509 shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) process_output = handle_process_completion( process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code) else: process_output = self._cgroups_api.start_extension_command( extension_name, command, timeout, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, error_code=error_code) return process_output
def test_handle_process_completion_should_return_process_output(self): command = "echo 'dummy stdout' && 1>&2 echo 'dummy stderr'" with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stdout: with tempfile.TemporaryFile(dir=self.tmp_dir, mode="w+b") as stderr: process = subprocess.Popen(command, shell=True, cwd=self.tmp_dir, env={}, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) process_output = handle_process_completion(process=process, command=command, timeout=5, stdout=stdout, stderr=stderr, error_code=42) expected_output = "[stdout]\ndummy stdout\n\n\n[stderr]\ndummy stderr\n" self.assertEquals(process_output, expected_output)
def start_extension_command(self, extension_name, command, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): scope = "{0}_{1}".format(self._get_extension_cgroup_name(extension_name), uuid.uuid4()) process = subprocess.Popen( "systemd-run --unit={0} --scope {1}".format(scope, command), shell=shell, cwd=cwd, stdout=stdout, stderr=stderr, env=env, preexec_fn=os.setsid) scope_name = scope + '.scope' logger.info("Started extension in unit '{0}'", scope_name) try: # systemd-run creates the scope under the system slice by default cgroup_relative_path = os.path.join('system.slice', scope_name) cpu_cgroup_mountpoint, memory_cgroup_mountpoint = self.get_cgroup_mount_points() if cpu_cgroup_mountpoint is None: logger.info("The CPU controller is not mounted; will not track resource usage") else: cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path) CGroupsTelemetry.track_cgroup(CpuCgroup(extension_name, cpu_cgroup_path)) if memory_cgroup_mountpoint is None: logger.info("The memory controller is not mounted; will not track resource usage") else: memory_cgroup_path = os.path.join(memory_cgroup_mountpoint, cgroup_relative_path) CGroupsTelemetry.track_cgroup(MemoryCgroup(extension_name, memory_cgroup_path)) except IOError as e: if e.errno == 2: # 'No such file or directory' logger.info("The extension command already completed; will not track resource usage") logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e)) except Exception as e: logger.info("Failed to start tracking resource usage for the extension: {0}", ustr(e)) # Wait for process completion or timeout try: process_output = handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code) except ExtensionError as e: # The extension didn't terminate successfully. Determine whether it was due to systemd errors or # extension errors. process_output = read_output(stdout, stderr) systemd_failure = self._is_systemd_failure(scope, process_output) if not systemd_failure: # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error raise else: # There was an issue with systemd-run. We need to log it and retry the extension without systemd. err_msg = 'Systemd process exited with code %s and output %s' % (e.exit_code, process_output) \ if isinstance(e, ExtensionOperationError) else "Systemd timed-out, output: %s" % process_output event_msg = 'Failed to run systemd-run for unit {0}.scope. ' \ 'Will retry invoking the extension without systemd. ' \ 'Systemd-run error: {1}'.format(scope, err_msg) add_event(op=WALAEventOperation.InvokeCommandUsingSystemd, is_success=False, log_event=False, message=event_msg) logger.warn(event_msg) # Reset the stdout and stderr stdout.truncate(0) stderr.truncate(0) # Try invoking the process again, this time without systemd-run logger.info('Extension invocation using systemd failed, falling back to regular invocation ' 'without cgroups tracking.') process = subprocess.Popen(command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) process_output = handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code) return process_output # The process terminated in time and successfully return process_output
def start_extension_command( self, extension_name, command, cmd_name, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): scope = "{0}_{1}".format(cmd_name, uuid.uuid4()) extension_slice_name = self.get_extension_slice_name(extension_name) with self._systemd_run_commands_lock: process = subprocess.Popen( # pylint: disable=W1509 "systemd-run --unit={0} --scope --slice={1} {2}".format( scope, extension_slice_name, command), shell=shell, cwd=cwd, stdout=stdout, stderr=stderr, env=env, preexec_fn=os.setsid) # We start systemd-run with shell == True so process.pid is the shell's pid, not the pid for systemd-run self._systemd_run_commands.append(process.pid) scope_name = scope + '.scope' logger.info("Started extension in unit '{0}'", scope_name) try: cgroup_relative_path = os.path.join( 'azure.slice/azure-vmextensions.slice', extension_slice_name) cpu_cgroup_mountpoint, _ = self.get_cgroup_mount_points() if cpu_cgroup_mountpoint is None: logger.info( "The CPU controller is not mounted; will not track resource usage" ) else: cpu_cgroup_path = os.path.join(cpu_cgroup_mountpoint, cgroup_relative_path) CGroupsTelemetry.track_cgroup( CpuCgroup(extension_name, cpu_cgroup_path)) except IOError as e: if e.errno == 2: # 'No such file or directory' logger.info( "The extension command already completed; will not track resource usage" ) logger.info( "Failed to start tracking resource usage for the extension: {0}", ustr(e)) except Exception as e: logger.info( "Failed to start tracking resource usage for the extension: {0}", ustr(e)) # Wait for process completion or timeout try: return handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code) except ExtensionError as e: # The extension didn't terminate successfully. Determine whether it was due to systemd errors or # extension errors. if not self._is_systemd_failure(scope, stderr): # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error raise # There was an issue with systemd-run. We need to log it and retry the extension without systemd. process_output = read_output(stdout, stderr) # Reset the stdout and stderr stdout.truncate(0) stderr.truncate(0) if isinstance(e, ExtensionOperationError): # no-member: Instance of 'ExtensionError' has no 'exit_code' member (no-member) - Disabled: e is actually an ExtensionOperationError err_msg = 'Systemd process exited with code %s and output %s' % ( e.exit_code, process_output) # pylint: disable=no-member else: err_msg = "Systemd timed-out, output: %s" % process_output raise SystemdRunError(err_msg) finally: with self._systemd_run_commands_lock: self._systemd_run_commands.remove(process.pid)
def start_extension_command( self, extension_name, command, timeout, shell, cwd, env, stdout, stderr, error_code=ExtensionErrorCodes.PluginUnknownFailure): scope_name = "{0}_{1}".format( self._get_extension_cgroup_name(extension_name), uuid.uuid4()) process = subprocess.Popen("systemd-run --unit={0} --scope {1}".format( scope_name, command), shell=shell, cwd=cwd, stdout=stdout, stderr=stderr, env=env, preexec_fn=os.setsid) logger.info("Started extension using scope '{0}'", scope_name) extension_cgroups = [] def create_cgroup(controller): cgroup_path = os.path.join(CGROUPS_FILE_SYSTEM_ROOT, controller, 'system.slice', scope_name + ".scope") extension_cgroups.append( CGroup.create(cgroup_path, controller, extension_name)) self._foreach_controller( create_cgroup, 'Cannot create cgroup for extension {0}; ' 'resource usage will not be tracked.'.format(extension_name)) self.track_cgroups(extension_cgroups) # Wait for process completion or timeout try: process_output = handle_process_completion(process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code) except ExtensionError as e: # The extension didn't terminate successfully. Determine whether it was due to systemd errors or # extension errors. process_output = read_output(stdout, stderr) systemd_failure = self._is_systemd_failure(scope_name, process_output) if not systemd_failure: # There was an extension error; it either timed out or returned a non-zero exit code. Re-raise the error raise else: # There was an issue with systemd-run. We need to log it and retry the extension without systemd. err_msg = 'Systemd process exited with code %s and output %s' % (e.exit_code, process_output) \ if isinstance(e, ExtensionOperationError) else "Systemd timed-out, output: %s" % process_output event_msg = 'Failed to run systemd-run for unit {0}.scope. ' \ 'Will retry invoking the extension without systemd. ' \ 'Systemd-run error: {1}'.format(scope_name, err_msg) add_event(AGENT_NAME, version=CURRENT_VERSION, op=WALAEventOperation.InvokeCommandUsingSystemd, is_success=False, log_event=False, message=event_msg) logger.warn(event_msg) # Reset the stdout and stderr stdout.truncate(0) stderr.truncate(0) # Try invoking the process again, this time without systemd-run logger.info( 'Extension invocation using systemd failed, falling back to regular invocation ' 'without cgroups tracking.') process = subprocess.Popen(command, shell=shell, cwd=cwd, env=env, stdout=stdout, stderr=stderr, preexec_fn=os.setsid) process_output = handle_process_completion( process=process, command=command, timeout=timeout, stdout=stdout, stderr=stderr, error_code=error_code) return [], process_output # The process terminated in time and successfully return extension_cgroups, process_output