Пример #1
0
 def _wait_for_reboot(self, old_boot_id):
     logging.info("Client is rebooting")
     logging.info("Waiting for client to halt")
     if not self.host.wait_down(self.host.WAIT_DOWN_REBOOT_TIMEOUT,
                                old_boot_id=old_boot_id):
         err = "%s failed to shutdown after %d"
         err %= (self.host.hostname, self.host.WAIT_DOWN_REBOOT_TIMEOUT)
         raise error.AutotestRunError(err)
     logging.info("Client down, waiting for restart")
     if not self.host.wait_up(self.host.DEFAULT_REBOOT_TIMEOUT):
         # since reboot failed
         # hardreset the machine once if possible
         # before failing this control file
         warning = "%s did not come back up, hard resetting"
         warning %= self.host.hostname
         logging.warning(warning)
         try:
             self.host.hardreset(wait=False)
         except (AttributeError, error.AutoservUnsupportedError), detail:
             warning = ("Hard reset unsupported on %s: %s" %
                        (self.hostname, detail))
             logging.warning(warning)
         raise error.AutotestRunError(
             "%s failed to boot after %ds" %
             (self.host.hostname, self.host.DEFAULT_REBOOT_TIMEOUT))
Пример #2
0
    def execute_section(self, section, timeout, stderr_redirector,
                        client_disconnect_timeout):
        logging.info("Executing %s/bin/autotest %s/control phase %d",
                     self.autodir, self.autodir, section)

        if self.background:
            result = self._execute_in_background(section, timeout)
        else:
            result = self._execute_daemon(section, timeout, stderr_redirector,
                                          client_disconnect_timeout)

        last_line = stderr_redirector.last_line

        # check if we failed hard enough to warrant an exception
        if result.exit_status == 1:
            err = error.AutotestRunError("client job was aborted")
        elif not self.background and not result.stderr:
            err = error.AutotestRunError(
                "execute_section %s failed to return anything\n"
                "stdout:%s\n" % (section, result.stdout))
        else:
            err = None

        # log something if the client failed AND never finished logging
        if err and not self.is_client_job_finished(last_line):
            self.log_unexpected_abort(stderr_redirector)

        if err:
            raise err
        else:
            return stderr_redirector.last_line
Пример #3
0
    def execute_control(self, timeout=None, client_disconnect_timeout=None):
        if not self.background:
            collector = log_collector(self.host, self.tag, self.results_dir)
            hostname = self.host.hostname
            remote_results = collector.client_results_dir
            local_results = collector.server_results_dir
            self.host.job.add_client_log(hostname, remote_results,
                                         local_results)
            job_record_context = self.host.job.get_record_context()

        section = 0
        start_time = time.time()

        logger = client_logger(self.host, self.tag, self.results_dir)
        try:
            while not timeout or time.time() < start_time + timeout:
                if timeout:
                    section_timeout = start_time + timeout - time.time()
                else:
                    section_timeout = None
                boot_id = self.host.get_boot_id()
                last = self.execute_section(section, section_timeout, logger,
                                            client_disconnect_timeout)
                if self.background:
                    return
                section += 1
                if self.is_client_job_finished(last):
                    logging.info("Client complete")
                    return
                elif self.is_client_job_rebooting(last):
                    try:
                        self._wait_for_reboot(boot_id)
                    except error.AutotestRunError, e:
                        self.host.job.record("ABORT", None, "reboot", str(e))
                        self.host.job.record("END ABORT", None, None, str(e))
                        raise
                    continue

                # if we reach here, something unexpected happened
                self.log_unexpected_abort(logger)

                # give the client machine a chance to recover from a crash
                self.host.wait_up(self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
                msg = ("Aborting - unexpected final status message from "
                       "client on %s: %s\n") % (self.host.hostname, last)
                raise error.AutotestRunError(msg)
        finally:
            logger.close()
            if not self.background:
                collector.collect_client_job_results()
                collector.remove_redundant_client_logs()
                state_file = os.path.basename(self.remote_control_file +
                                              '.state')
                state_path = os.path.join(self.results_dir, state_file)
                self.host.job.postprocess_client_state(state_path)
                self.host.job.remove_client_log(hostname, remote_results,
                                                local_results)
                job_record_context.restore()

        # should only get here if we timed out
        assert timeout
        raise error.AutotestTimeoutError()
Пример #4
0
    def execute_control(self, timeout=None, client_disconnect_timeout=None):
        collector = log_collector(self.host, self.tag, self.results_dir)
        hostname = self.host.hostname
        remote_results = collector.client_results_dir
        local_results = collector.server_results_dir
        self.host.job.add_client_log(hostname, remote_results, local_results)
        job_record_context = self.host.job.get_record_context()
        logger = client_logger(self.host, self.tag, self.results_dir)

        try:
            boot_id = self.host.get_boot_id()
            last = self._really_execute_control(timeout, logger,
                                                client_disconnect_timeout)
            if self.is_client_job_finished(last):
                logging.info("Client complete")
                return
            elif self.is_client_job_rebooting(last):
                # TODO(crbug.com/684311) This feature is never used. Validate
                # and drop this case.
                m = 'chromeos/autotest/errors/client_test_triggered_reboot'
                metrics.Counter(m).increment()
                self.host.job.record("ABORT", None, "reboot",
                                     'client triggered reboot is unsupported')
                self.host.job.record("END ABORT", None, None,
                                     'client triggered reboot is unsupported')
                return

            # If a test fails without probable cause we try to bucket it's
            # failure into one of 2 categories. If we can determine the
            # current state of the device and it is suspicious, we close the
            # status lines indicating a failure. If we either cannot
            # determine the state of the device, or it appears totally
            # healthy, we give up and abort.
            try:
                self._diagnose_dut(boot_id)
            except AutotestDeviceError as e:
                # The status lines of the test are pretty much tailed to
                # our log, with indentation, from the client job on the DUT.
                # So if the DUT goes down unexpectedly we'll end up with a
                # malformed status log unless we manually unwind the status
                # stack. Ideally we would want to write a nice wrapper like
                # server_job methods run_reboot, run_group but they expect
                # reboots and we don't.
                self.host.job.record('FAIL', None, None, str(e))
                self.host.job.record('END FAIL', None, None)
                self.host.job.record('END GOOD', None, None)
                self.host.job.failed_with_device_error = True
                return
            except AutotestAbort as e:
                self.host.job.record('ABORT', None, None, str(e))
                self.host.job.record('END ABORT', None, None)

                # give the client machine a chance to recover from a crash
                self.host.wait_up(self.host.HOURS_TO_WAIT_FOR_RECOVERY * 3600)
                logging.debug(
                    'Unexpected final status message from '
                    'client %s: %s', self.host.hostname, last)
                # The line 'last' may have sensitive phrases, like
                # 'END GOOD', which breaks the tko parser. So the error
                # message will exclude it, since it will be recorded to
                # status.log.
                msg = ("Aborting - unexpected final status message from "
                       "client on %s\n") % self.host.hostname
                raise error.AutotestRunError(msg)
        finally:
            logging.debug('Autotest job finishes running. Below is the '
                          'post-processing operations.')
            logger.close()
            collector.collect_client_job_results()
            collector.remove_redundant_client_logs()
            state_file = os.path.basename(self.remote_control_file + '.state')
            state_path = os.path.join(self.results_dir, state_file)
            self.host.job.postprocess_client_state(state_path)
            self.host.job.remove_client_log(hostname, remote_results,
                                            local_results)
            job_record_context.restore()

            logging.debug('Autotest job finishes.')

        # should only get here if we timed out
        assert timeout
        raise error.AutotestTimeoutError()
Пример #5
0
    def execute_control(self, timeout=None, client_disconnect_timeout=None):
        if not self.background:
            collector = log_collector(self.host, self.tag, self.results_dir)
            hostname = self.host.hostname
            remote_results = collector.client_results_dir
            local_results = collector.server_results_dir
            self.host.job.add_client_log(hostname, remote_results,
                                         local_results)
            job_record_context = self.host.job.get_record_context()

        section = 0
        start_time = time.time()

        logger = client_logger(self.host, self.tag, self.results_dir)
        try:
            while not timeout or time.time() < start_time + timeout:
                if timeout:
                    section_timeout = start_time + timeout - time.time()
                else:
                    section_timeout = None
                boot_id = self.host.get_boot_id()
                last = self.execute_section(section, section_timeout, logger,
                                            client_disconnect_timeout)
                if self.background:
                    return
                section += 1
                if self.is_client_job_finished(last):
                    logging.info("Client complete")
                    return
                elif self.is_client_job_rebooting(last):
                    try:
                        self._wait_for_reboot(boot_id)
                    except error.AutotestRunError, e:
                        self.host.job.record("ABORT", None, "reboot", str(e))
                        self.host.job.record("END ABORT", None, None, str(e))
                        raise
                    continue

                # If a test fails without probable cause we try to bucket it's
                # failure into one of 2 categories. If we can determine the
                # current state of the device and it is suspicious, we close the
                # status lines indicating a failure. If we either cannot
                # determine the state of the device, or it appears totally
                # healthy, we give up and abort.
                try:
                    self._diagnose_dut(boot_id)
                except AutotestDeviceError as e:
                    # The status lines of the test are pretty much tailed to
                    # our log, with indentation, from the client job on the DUT.
                    # So if the DUT goes down unexpectedly we'll end up with a
                    # malformed status log unless we manually unwind the status
                    # stack. Ideally we would want to write a nice wrapper like
                    # server_job methods run_reboot, run_group but they expect
                    # reboots and we don't.
                    self.host.job.record('FAIL', None, None, str(e))
                    self.host.job.record('END FAIL', None, None)
                    self.host.job.record('END GOOD', None, None)
                    self.host.job.failed_with_device_error = True
                    return
                except AutotestAbort as e:
                    self.host.job.record('ABORT', None, None, str(e))
                    self.host.job.record('END ABORT', None, None)

                    # give the client machine a chance to recover from a crash
                    self.host.wait_up(self.host.HOURS_TO_WAIT_FOR_RECOVERY *
                                      3600)
                    msg = ("Aborting - unexpected final status message from "
                           "client on %s: %s\n") % (self.host.hostname, last)
                    raise error.AutotestRunError(msg)
        finally:
            logger.close()
            if not self.background:
                collector.collect_client_job_results()
                collector.remove_redundant_client_logs()
                state_file = os.path.basename(self.remote_control_file +
                                              '.state')
                state_path = os.path.join(self.results_dir, state_file)
                self.host.job.postprocess_client_state(state_path)
                self.host.job.remove_client_log(hostname, remote_results,
                                                local_results)
                job_record_context.restore()

        # should only get here if we timed out
        assert timeout
        raise error.AutotestTimeoutError()