示例#1
0
 def _connect_transport(self, pkey):
     for _ in xrange(self.CONNECT_TIMEOUT_RETRIES):
         transport = paramiko.Transport(self._connect_socket())
         completed = threading.Event()
         transport.start_client(completed)
         completed.wait(self.CONNECT_TIMEOUT_SECONDS)
         if completed.isSet():
             self._check_transport_error(transport)
             completed.clear()
             transport.auth_publickey(self.user, pkey, completed)
             completed.wait(self.CONNECT_TIMEOUT_SECONDS)
             if completed.isSet():
                 self._check_transport_error(transport)
                 if not transport.is_authenticated():
                     transport.close()
                     raise paramiko.AuthenticationException()
                 return transport
         logging.warn("SSH negotiation (%s:%d) timed out, retrying",
                      self.hostname, self.port)
         # HACK: we can't count on transport.join not hanging now, either
         transport.join = lambda: None
         transport.close()
     logging.error(
         "SSH negotation (%s:%d) has timed out %s times, "
         "giving up", self.hostname, self.port,
         self.CONNECT_TIMEOUT_RETRIES)
     raise error.AutoservSSHTimeout("SSH negotiation timed out")
    def ssh_ping(self, timeout=60, base_cmd='true'):
        """
        Pings remote host via ssh.

        @param timeout: Time in seconds before giving up.
                        Defaults to 60 seconds.
        @param base_cmd: The base command to run with the ssh ping.
                         Defaults to true.
        @raise AutoservSSHTimeout: If the ssh ping times out.
        @raise AutoservSshPermissionDeniedError: If ssh ping fails due to
                                                 permissions.
        @raise AutoservSshPingHostError: For other AutoservRunErrors.
        """
        try:
            self.run(base_cmd, timeout=timeout, connect_timeout=timeout)
        except error.AutoservSSHTimeout:
            msg = "Host (ssh) verify timed out (timeout = %d)" % timeout
            raise error.AutoservSSHTimeout(msg)
        except error.AutoservSshPermissionDeniedError:
            #let AutoservSshPermissionDeniedError be visible to the callers
            raise
        except error.AutoservRunError, e:
            # convert the generic AutoservRunError into something more
            # specific for this context
            raise error.AutoservSshPingHostError(e.description + '\n' +
                                                 repr(e.result_obj))
    def testTriggerUpdate(self):
        """Tests that we correctly handle updater errors."""
        update_url = 'http://server/test/url'
        host = self.mox.CreateMockAnything()
        self.mox.StubOutWithMock(host, 'run')
        host.hostname = 'test_host'

        expected_cmd = ('/usr/bin/update_engine_client --check_for_update '
                        '--omaha_url=http://server/test/url')

        updater = autoupdater.ChromiumOSUpdater(update_url, host=host)

        # Test with success.
        host.run(expected_cmd)

        # SSH Timeout
        host.run(expected_cmd).AndRaise(
            error.AutoservSSHTimeout("ssh timed out", 255))

        # SSH Permission Error
        host.run(expected_cmd).AndRaise(
            error.AutoservSshPermissionDeniedError("ssh timed out", 255))

        # Command Failed Error
        cmd_result_1 = self.mox.CreateMockAnything()
        cmd_result_1.exit_status = 1

        host.run(expected_cmd).AndRaise(
            error.AutoservRunError("ssh timed out", cmd_result_1))

        # Generic SSH Error (maybe)
        cmd_result_255 = self.mox.CreateMockAnything()
        cmd_result_255.exit_status = 255

        host.run(expected_cmd).AndRaise(
            error.AutoservRunError("Sometimes SSH specific result.",
                                   cmd_result_255))

        self.mox.ReplayAll()

        # Verify Success.
        updater.trigger_update()

        # Verify each type of error listed above.
        self.assertRaises(autoupdater.RootFSUpdateError,
                          updater.trigger_update)
        self.assertRaises(autoupdater.RootFSUpdateError,
                          updater.trigger_update)
        self.assertRaises(autoupdater.RootFSUpdateError,
                          updater.trigger_update)
        self.assertRaises(autoupdater.RootFSUpdateError,
                          updater.trigger_update)

        self.mox.VerifyAll()
示例#4
0
    def _execute_daemon(self, timeout, stderr_redirector,
                        client_disconnect_timeout):
        monitor_dir = self.host.get_tmp_dir()
        daemon_cmd = self.get_daemon_cmd(monitor_dir)

        # grab the location for the server-side client log file
        client_log_prefix = self.get_client_log()
        client_log_path = os.path.join(self.results_dir, 'debug',
                                       client_log_prefix + '.log')
        client_log = open(client_log_path, 'w', 0)
        self.copy_client_config_file(client_log_prefix)

        stdout_read = stderr_read = 0
        self.host.job.push_execution_context(self.results_dir)
        try:
            self.host.run(daemon_cmd, ignore_status=True, timeout=timeout)
            disconnect_warnings = []
            while True:
                monitor_cmd = self.get_monitor_cmd(monitor_dir, stdout_read,
                                                   stderr_read)
                try:
                    result = self.host.run(monitor_cmd,
                                           ignore_status=True,
                                           timeout=timeout,
                                           stdout_tee=client_log,
                                           stderr_tee=stderr_redirector)
                except error.AutoservRunError, e:
                    result = e.result_obj
                    result.exit_status = None
                    disconnect_warnings.append(e.description)

                    stderr_redirector.log_warning(
                        "Autotest client was disconnected: %s" % e.description,
                        "NETWORK")
                except error.AutoservSSHTimeout:
                    result = utils.CmdResult(monitor_cmd, "", "", None, 0)
                    stderr_redirector.log_warning(
                        "Attempt to connect to Autotest client timed out",
                        "NETWORK")

                stdout_read += len(result.stdout)
                stderr_read += len(self._strip_stderr_prologue(result.stderr))

                if result.exit_status is not None:
                    # TODO (crosbug.com/38224)- sbasi: Remove extra logging.
                    logging.debug('Result exit status is %d.',
                                  result.exit_status)
                    return result
                elif not self.host.wait_up(client_disconnect_timeout):
                    raise error.AutoservSSHTimeout(
                        "client was disconnected, reconnect timed out")
示例#5
0
 def ssh_ping(self, timeout=60):
     try:
         self.run("true", timeout=timeout, connect_timeout=timeout)
     except error.AutoservSSHTimeout:
         msg = "Host (ssh) verify timed out (timeout = %d)" % timeout
         raise error.AutoservSSHTimeout(msg)
     except error.AutoservSshPermissionDeniedError:
         #let AutoservSshPermissionDeniedError be visible to the callers
         raise
     except error.AutoservRunError, e:
         # convert the generic AutoservRunError into something more
         # specific for this context
         raise error.AutoservSshPingHostError(e.description + '\n' +
                                              repr(e.result_obj))
    def run(self,
            command,
            timeout=3600,
            ignore_status=False,
            stdout_tee=utils.TEE_TO_LOGS,
            stderr_tee=utils.TEE_TO_LOGS,
            connect_timeout=30,
            stdin=None,
            verbose=True,
            args=(),
            ignore_timeout=False):
        """
        Run a command on the remote host.
        @see common_lib.hosts.host.run()

        @param connect_timeout: connection timeout (in seconds)
        @param options: string with additional ssh command options
        @param verbose: log the commands
        @param ignore_timeout: bool True command timeouts should be
                               ignored.  Will return None on command timeout.

        @raises AutoservRunError: if the command failed
        @raises AutoservSSHTimeout: ssh connection has timed out
        """

        stdout = utils.get_stream_tee_file(stdout_tee,
                                           utils.DEFAULT_STDOUT_LEVEL,
                                           prefix=utils.STDOUT_PREFIX)
        stderr = utils.get_stream_tee_file(
            stderr_tee,
            utils.get_stderr_level(ignore_status),
            prefix=utils.STDERR_PREFIX)

        for arg in args:
            command += ' "%s"' % utils.sh_escape(arg)

        if verbose:
            logging.debug("Running (ssh-paramiko) '%s'", command)

        # start up the command
        start_time = time.time()
        try:
            channel = self._open_channel(timeout)
            channel.exec_command(command)
        except (socket.error, paramiko.SSHException, EOFError), e:
            # This has to match the string from paramiko *exactly*.
            if str(e) != 'Channel closed.':
                raise error.AutoservSSHTimeout("ssh failed: %s" % e)
示例#7
0
    def _open_channel(self, timeout):
        start_time = time.time()
        if os.getpid() != self.pid:
            if self.pid is not None:
                # HACK: paramiko tries to join() on its worker thread
                # and this just hangs on linux after a fork()
                self.transport.join = lambda: None
                self.transport.atfork()
                join_hook = lambda cmd: self._close_transport()
                subcommand.subcommand.register_join_hook(join_hook)
                logging.debug("Reopening SSH connection after a process fork")
            self._init_transport()

        channel = None
        try:
            channel = self.transport.open_session()
        except (socket.error, paramiko.SSHException, EOFError), e:
            logging.warn("Exception occured while opening session: %s", e)
            if time.time() - start_time >= timeout:
                raise error.AutoservSSHTimeout("ssh failed: %s" % e)
示例#8
0
    def _run(self, command, timeout, ignore_status, stdout, stderr,
             connect_timeout, env, options, stdin, args):
        """Helper function for run()."""
        ssh_cmd = self.ssh_command(connect_timeout, options)
        if not env.strip():
            env = ""
        else:
            env = "export %s;" % env
        for arg in args:
            command += ' "%s"' % utils.sh_escape(arg)
        full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))
        result = utils.run(full_cmd,
                           timeout,
                           True,
                           stdout,
                           stderr,
                           verbose=False,
                           stdin=stdin,
                           stderr_is_expected=ignore_status)

        # The error messages will show up in band (indistinguishable
        # from stuff sent through the SSH connection), so we have the
        # remote computer echo the message "Connected." before running
        # any command.  Since the following 2 errors have to do with
        # connecting, it's safe to do these checks.
        if result.exit_status == 255:
            if re.search(
                    r'^ssh: connect to host .* port .*: '
                    r'Connection timed out\r$', result.stderr):
                raise error.AutoservSSHTimeout("ssh timed out", result)
            if "Permission denied." in result.stderr:
                msg = "ssh permission denied"
                raise error.AutoservSshPermissionDeniedError(msg, result)

        if not ignore_status and result.exit_status > 0:
            raise error.AutoservRunError("command execution error", result)

        return result
示例#9
0
    def _run(self, command, timeout, ignore_status, stdout, stderr,
             connect_timeout, env, options, stdin, args, ignore_timeout):
        """Helper function for run()."""
        ssh_cmd = self.ssh_command(connect_timeout, options)
        if not env.strip():
            env = ""
        else:
            env = "export %s;" % env
        for arg in args:
            command += ' "%s"' % utils.sh_escape(arg)
        full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))

        # TODO(jrbarnette):  crbug.com/484726 - When we're in an SSP
        # container, sometimes shortly after reboot we will see DNS
        # resolution errors on ssh commands; the problem never
        # occurs more than once in a row.  This especially affects
        # the autoupdate_Rollback test, but other cases have been
        # affected, too.
        #
        # We work around it by detecting the first DNS resolution error
        # and retrying exactly one time.
        dns_retry_count = 2
        while True:
            result = utils.run(full_cmd,
                               timeout,
                               True,
                               stdout,
                               stderr,
                               verbose=False,
                               stdin=stdin,
                               stderr_is_expected=ignore_status,
                               ignore_timeout=ignore_timeout)
            dns_retry_count -= 1
            if (result and result.exit_status == 255 and re.search(
                    r'^ssh: .*: Name or service not known', result.stderr)):
                if dns_retry_count:
                    logging.debug('Retrying because of DNS failure')
                    continue
                logging.debug('Retry failed.')
                autotest_stats.Counter('dns_retry_hack.fail').increment()
            elif not dns_retry_count:
                logging.debug('Retry succeeded.')
                autotest_stats.Counter('dns_retry_hack.pass').increment()
            break

        if ignore_timeout and not result:
            return None

        # The error messages will show up in band (indistinguishable
        # from stuff sent through the SSH connection), so we have the
        # remote computer echo the message "Connected." before running
        # any command.  Since the following 2 errors have to do with
        # connecting, it's safe to do these checks.
        if result.exit_status == 255:
            if re.search(
                    r'^ssh: connect to host .* port .*: '
                    r'Connection timed out\r$', result.stderr):
                raise error.AutoservSSHTimeout("ssh timed out", result)
            if "Permission denied." in result.stderr:
                msg = "ssh permission denied"
                raise error.AutoservSshPermissionDeniedError(msg, result)

        if not ignore_status and result.exit_status > 0:
            raise error.AutoservRunError("command execution error", result)

        return result
示例#10
0
    def testTriggerUpdate(self):
        """Tests that we correctly handle updater errors."""
        update_url = 'http://server/test/url'
        self.host = self.mox.CreateMockAnything()
        self.mox.StubOutWithMock(self.host, 'run')
        self.mox.StubOutWithMock(autoupdater.ChromiumOSUpdater,
                                 'get_last_update_error')
        self.host.hostname = 'test_host'
        updater_control_bin = '/usr/bin/update_engine_client'
        test_url = 'http://server/test/url'
        expected_wait_cmd = ('%s -status | grep CURRENT_OP' %
                             updater_control_bin)
        expected_cmd = ('%s --check_for_update --omaha_url=%s' %
                        (updater_control_bin, test_url))
        self.mox.StubOutWithMock(time, "sleep")
        UPDATE_ENGINE_RETRY_WAIT_TIME = 5

        # Generic SSH Error.
        cmd_result_255 = self.mox.CreateMockAnything()
        cmd_result_255.exit_status = 255

        # Command Failed Error
        cmd_result_1 = self.mox.CreateMockAnything()
        cmd_result_1.exit_status = 1

        # Error 37
        cmd_result_37 = self.mox.CreateMockAnything()
        cmd_result_37.exit_status = 37

        updater = autoupdater.ChromiumOSUpdater(update_url, host=self.host)

        # (SUCCESS) Expect one wait command and one status command.
        self._host_run_for_update(expected_wait_cmd)
        self._host_run_for_update(expected_cmd)

        # (SUCCESS) Test with one retry to wait for update-engine.
        self._host_run_for_update(expected_wait_cmd,
                                  exception=error.AutoservRunError(
                                      'non-zero status', cmd_result_1))
        time.sleep(UPDATE_ENGINE_RETRY_WAIT_TIME)
        self._host_run_for_update(expected_wait_cmd)
        self._host_run_for_update(expected_cmd)

        # (SUCCESS) One-time SSH timeout, then success on retry.
        self._host_run_for_update(expected_wait_cmd)
        self._host_run_for_update(expected_cmd,
                                  exception=error.AutoservSSHTimeout(
                                      'ssh timed out', cmd_result_255))
        self._host_run_for_update(expected_cmd)

        # (SUCCESS) One-time ERROR 37, then success.
        self._host_run_for_update(expected_wait_cmd)
        self._host_run_for_update(expected_cmd,
                                  exception=error.AutoservRunError(
                                      'ERROR_CODE=37', cmd_result_37))
        self._host_run_for_update(expected_cmd)

        # (FAILURE) Bad status of update engine.
        self._host_run_for_update(expected_wait_cmd)
        self._host_run_for_update(
            expected_cmd,
            bad_update_status=True,
            exception=error.InstallError('host is not in installable state'))

        # (FAILURE) Two-time SSH timeout.
        self._host_run_for_update(expected_wait_cmd)
        self._host_run_for_update(expected_cmd,
                                  exception=error.AutoservSSHTimeout(
                                      'ssh timed out', cmd_result_255))
        self._host_run_for_update(expected_cmd,
                                  exception=error.AutoservSSHTimeout(
                                      'ssh timed out', cmd_result_255))

        # (FAILURE) SSH Permission Error
        self._host_run_for_update(expected_wait_cmd)
        self._host_run_for_update(
            expected_cmd,
            exception=error.AutoservSshPermissionDeniedError(
                'no permission', cmd_result_255))

        # (FAILURE) Other ssh failure
        self._host_run_for_update(expected_wait_cmd)
        self._host_run_for_update(
            expected_cmd,
            exception=error.AutoservSshPermissionDeniedError(
                'no permission', cmd_result_255))
        # (FAILURE) Other error
        self._host_run_for_update(expected_wait_cmd)
        self._host_run_for_update(expected_cmd,
                                  exception=error.AutoservRunError(
                                      "unknown error", cmd_result_1))

        self.mox.ReplayAll()

        # Expect success
        updater.trigger_update()
        updater.trigger_update()
        updater.trigger_update()
        updater.trigger_update()

        # Expect errors as listed above
        self.assertRaises(autoupdater.RootFSUpdateError,
                          updater.trigger_update)
        self.assertRaises(autoupdater.RootFSUpdateError,
                          updater.trigger_update)
        self.assertRaises(autoupdater.RootFSUpdateError,
                          updater.trigger_update)
        self.assertRaises(autoupdater.RootFSUpdateError,
                          updater.trigger_update)
        self.assertRaises(autoupdater.RootFSUpdateError,
                          updater.trigger_update)

        self.mox.VerifyAll()
示例#11
0
    def _run(self, command, timeout, ignore_status, stdout, stderr,
             connect_timeout, env, options, stdin, args, ignore_timeout,
             ssh_failure_retry_ok):
        """Helper function for run()."""
        ssh_cmd = self.ssh_command(connect_timeout, options)
        if not env.strip():
            env = ""
        else:
            env = "export %s;" % env
        for arg in args:
            command += ' "%s"' % utils.sh_escape(arg)
        full_cmd = '%s "%s %s"' % (ssh_cmd, env, utils.sh_escape(command))

        # TODO(jrbarnette):  crbug.com/484726 - When we're in an SSP
        # container, sometimes shortly after reboot we will see DNS
        # resolution errors on ssh commands; the problem never
        # occurs more than once in a row.  This especially affects
        # the autoupdate_Rollback test, but other cases have been
        # affected, too.
        #
        # We work around it by detecting the first DNS resolution error
        # and retrying exactly one time.
        dns_error_retry_count = 1

        def counters_inc(counter_name, failure_name):
            """Helper function to increment metrics counters.
            @param counter_name: string indicating which counter to use
            @param failure_name: string indentifying an error, or 'success'
            """
            if counter_name == 'call':
                # ssh_counter records the outcome of each ssh invocation
                # inside _run(), including exceptions.
                ssh_counter = metrics.Counter('chromeos/autotest/ssh/calls')
                fields = {
                    'error': failure_name or 'success',
                    'attempt': ssh_call_count
                }
                ssh_counter.increment(fields=fields)

            if counter_name == 'run':
                # run_counter records each call to _run() with its result
                # and how many tries were made.  Calls are recorded when
                # _run() exits (including exiting with an exception)
                run_counter = metrics.Counter('chromeos/autotest/ssh/runs')
                fields = {
                    'error': failure_name or 'success',
                    'attempt': ssh_call_count
                }
                run_counter.increment(fields=fields)

        # If ssh_failure_retry_ok is True, retry twice on timeouts and generic
        # error 255: if a simple retry doesn't work, kill the ssh master
        # connection and try again.  (Note that either error could come from
        # the command running in the DUT, in which case the retry may be
        # useless but, in theory, also harmless.)
        if ssh_failure_retry_ok:
            # Ignore ssh command timeout, even though it could be a timeout due
            # to the command executing in the remote host.  Note that passing
            # ignore_timeout = True makes utils.run() return None on timeouts
            # (and only on timeouts).
            original_ignore_timeout = ignore_timeout
            ignore_timeout = True
            ssh_failure_retry_count = 2
        else:
            ssh_failure_retry_count = 0

        ssh_call_count = 0

        while True:
            try:
                # Increment call count first, in case utils.run() throws an
                # exception.
                ssh_call_count += 1
                result = utils.run(full_cmd,
                                   timeout,
                                   True,
                                   stdout,
                                   stderr,
                                   verbose=False,
                                   stdin=stdin,
                                   stderr_is_expected=ignore_status,
                                   ignore_timeout=ignore_timeout)
            except Exception as e:
                # No retries on exception.
                counters_inc('call', 'exception')
                counters_inc('run', 'exception')
                raise e

            failure_name = None

            if result:
                if result.exit_status == 255:
                    if re.search(r'^ssh: .*: Name or service not known',
                                 result.stderr):
                        failure_name = 'dns_failure'
                    else:
                        failure_name = 'error_255'
                elif result.exit_status > 0:
                    failure_name = 'nonzero_status'
            else:
                # result == None
                failure_name = 'timeout'

            # Record the outcome of the ssh invocation.
            counters_inc('call', failure_name)

            if failure_name:
                # There was a failure: decide whether to retry.
                if failure_name == 'dns_failure':
                    if dns_error_retry_count > 0:
                        logging.debug('retrying ssh because of DNS failure')
                        dns_error_retry_count -= 1
                        continue
                else:
                    if ssh_failure_retry_count == 2:
                        logging.debug('retrying ssh command after %s',
                                      failure_name)
                        ssh_failure_retry_count -= 1
                        continue
                    elif ssh_failure_retry_count == 1:
                        # After two failures, restart the master connection
                        # before the final try.
                        logging.debug('retry 2: restarting master connection')
                        self.restart_master_ssh()
                        # Last retry: reinstate timeout behavior.
                        ignore_timeout = original_ignore_timeout
                        ssh_failure_retry_count -= 1
                        continue

            # No retry conditions occurred.  Exit the loop.
            break

        # The outcomes of ssh invocations have been recorded.  Now record
        # the outcome of this function.

        if ignore_timeout and not result:
            counters_inc('run', 'ignored_timeout')
            return None

        # The error messages will show up in band (indistinguishable
        # from stuff sent through the SSH connection), so we have the
        # remote computer echo the message "Connected." before running
        # any command.  Since the following 2 errors have to do with
        # connecting, it's safe to do these checks.
        if result.exit_status == 255:
            if re.search(
                    r'^ssh: connect to host .* port .*: '
                    r'Connection timed out\r$', result.stderr):
                counters_inc('run', 'final_timeout')
                raise error.AutoservSSHTimeout("ssh timed out", result)
            if "Permission denied." in result.stderr:
                msg = "ssh permission denied"
                counters_inc('run', 'final_eperm')
                raise error.AutoservSshPermissionDeniedError(msg, result)

        if not ignore_status and result.exit_status > 0:
            counters_inc('run', 'final_run_error')
            raise error.AutoservRunError("command execution error", result)

        counters_inc('run', failure_name)
        return result