Exemplo n.º 1
0
 def run_container_start_checks(self, started_at, timeout_at):
     checks_start_time = time.time()
     while time.time() <= timeout_at:
         if not self.is_running():
             raise FactoryNotStarted("{} is no longer running".format(self))
         if self._container_start_checks():
             break
     else:
         log.error(
             "Failed to run container start checks after %1.2f seconds",
             time.time() - checks_start_time,
         )
         return False
     check_ports = set(self.get_check_ports())
     if not check_ports:
         return True
     while time.time() <= timeout_at:
         if not self.is_running():
             raise FactoryNotStarted("{} is no longer running".format(self))
         if not check_ports:
             break
         check_ports -= ports.get_connectable_ports(check_ports)
         if check_ports:
             time.sleep(0.5)
     else:
         log.error(
             "Failed to check ports after %1.2f seconds",
             time.time() - checks_start_time,
         )
         return False
     return True
Exemplo n.º 2
0
    def wait_for_events(self, patterns, timeout=30, after_time=None):
        start_time = time.time()
        patterns = set(patterns)
        log.info("Waiting at most %.2f seconds for event patterns: %s", timeout, patterns)
        if after_time is None:
            after_time = time.time()
        expire = start_time + timeout
        while time.time() <= expire:
            if not patterns:
                break

            for received, expire, master_id, tag, data in copy.copy(self.store):
                if received < after_time:
                    # Too old, carry on
                    continue
                for _master_id, _pattern in set(patterns):
                    if _master_id != master_id:
                        continue
                    if fnmatch.fnmatch(tag, _pattern):
                        patterns.remove((_master_id, _pattern))
            time.sleep(0.125)
        else:
            log.info(
                "Timmed out after %.2f seconds waiting for event patterns: %s",
                time.time() - start_time,
                patterns,
            )
            return False
        return True
Exemplo n.º 3
0
    def run_start_checks(self, started_at, timeout_at):
        if not super().run_start_checks(started_at, timeout_at):
            return False
        if not self.event_listener:
            log.debug(
                "The 'event_listener' attribute is not set. Not checking events..."
            )
            return True

        check_events = set(self.get_check_events())
        if not check_events:
            log.debug("No events to listen to for %s", self)
            return True
        checks_start_time = time.time()
        while time.time() <= timeout_at:
            if not self.is_running():
                raise FactoryNotStarted("{} is no longer running".format(self))
            if not check_events:
                break
            check_events -= self.event_listener.get_events(
                check_events, after_time=started_at)
            if check_events:
                time.sleep(0.5)
        else:
            log.error(
                "Failed to check events after %1.2f seconds for %s",
                time.time() - checks_start_time,
                self,
            )
            return False
        log.debug("Successfuly checked for all events(%s) for %s",
                  set(self.get_check_events()), self)
        return True
    def wait_for_events(self, patterns, timeout=30, after_time=None):
        """
        Wait for a set of patterns to match or until timeout is reached.

        :param ~collections.abc.Sequence pattern:
            An iterable of tuples in the form of ``("<daemon-id>", "<event-tag-pattern>")``, ie, which daemon ID
            we're targeting and the event tag pattern which will be passed to :py:func:`~fnmatch.fnmatch` to
            assert a match.
        :keyword int,float timeout:
            The amount of time to wait for the events, in seconds.
        :keyword ~datetime.datetime,float after_time:
            After which time to start matching events.

        :return:
            An instance of :py:class:`~saltfactories.plugins.event_listener.MatchedEvents`.
        :rtype ~saltfactories.plugins.event_listener.MatchedEvents:
        """
        if after_time is None:
            after_time = datetime.utcnow()
        elif isinstance(after_time, float):
            after_time = datetime.utcfromtimestamp(after_time)
        after_time_iso = after_time.isoformat()
        log.debug(
            "%s is waiting for event patterns happening after %s: %s",
            self,
            after_time_iso,
            set(patterns),
        )
        found_events = set()
        patterns = set(patterns)
        timeout_at = time.time() + timeout
        while True:
            if not patterns:
                return True
            for event in copy.copy(self.store):
                if event.expired:
                    # Too old, carry on
                    continue
                if event.stamp < after_time:
                    continue
                for pattern in set(patterns):
                    _daemon_id, _pattern = pattern
                    if event.daemon_id != _daemon_id:
                        continue
                    if fnmatch.fnmatch(event.tag, _pattern):
                        log.debug("%s Found matching pattern: %s", self,
                                  pattern)
                        found_events.add(event)
                        patterns.remove((event.daemon_id, _pattern))
            if not patterns:
                break
            if time.time() > timeout_at:
                break
            time.sleep(0.5)
        return MatchedEvents(matches=found_events, missed=patterns)
Exemplo n.º 5
0
    def run(self, *args, _timeout=None, **kwargs):
        """
        Run the given command synchronously
        """
        start_time = time.time()
        # Build the cmdline to pass to the terminal
        # We set the _terminal_timeout attribute while calling build_cmdline in case it needs
        # access to that information to build the command line
        self._terminal_timeout = _timeout or self.default_timeout
        self._terminal_timeout_set_explicitly = _timeout is not None
        timeout_expire = time.time() + self._terminal_timeout
        running = self._run(*args, **kwargs)

        timmed_out = False
        while True:
            if timeout_expire < time.time():
                timmed_out = True
                break
            if self._terminal.poll() is not None:
                break
            time.sleep(0.25)

        result = self.terminate()
        if timmed_out:
            raise FactoryTimeout(
                "{} Failed to run: {}; Error: Timed out after {:.2f} seconds!".
                format(self, result.cmdline,
                       time.time() - start_time),
                stdout=result.stdout,
                stderr=result.stderr,
                cmdline=result.cmdline,
                exitcode=result.exitcode,
            )

        cmdline = result.cmdline
        exitcode = result.exitcode
        stdout, stderr, json_out = self.process_output(result.stdout,
                                                       result.stderr,
                                                       cmdline=cmdline)
        log.info(
            "%s completed %r in CWD: %s after %.2f seconds",
            self,
            cmdline,
            self.cwd,
            time.time() - start_time,
        )
        return ShellResult(exitcode,
                           stdout,
                           stderr,
                           json=json_out,
                           cmdline=cmdline)
    def _cleanup(self):
        cleanup_at = time.time() + 30
        while self.running_event.is_set():
            if time.time() < cleanup_at:
                time.sleep(1)
                continue

            # Reset cleanup time
            cleanup_at = time.time() + 30

            # Cleanup expired events
            to_remove = []
            for event in self.store:
                if event.expired:
                    to_remove.append(event)

            for event in to_remove:
                log.debug("%s Removing from event store: %s", self, event)
                self.store.remove(event)
            log.debug("%s store size after cleanup: %s", self, len(self.store))
Exemplo n.º 7
0
 def wait_for_events(self, patterns, timeout=30, after_time=None):
     if after_time is None:
         after_time = datetime.utcnow()
     elif isinstance(after_time, float):
         after_time = datetime.utcfromtimestamp(after_time)
     after_time_iso = after_time.isoformat()
     log.debug(
         "%s is waiting for event patterns happening after %s: %s",
         self,
         after_time_iso,
         set(patterns),
     )
     found_events = set()
     patterns = set(patterns)
     timeout_at = time.time() + timeout
     while True:
         if not patterns:
             return True
         for event in copy.copy(self.store):
             if event.expired:
                 # Too old, carry on
                 continue
             if event.stamp < after_time:
                 continue
             for pattern in set(patterns):
                 _master_id, _pattern = pattern
                 if event.master_id != _master_id:
                     continue
                 if fnmatch.fnmatch(event.tag, _pattern):
                     log.debug("%s Found matching pattern: %s", self,
                               pattern)
                     patterns.remove((event.master_id, _pattern))
         if not patterns:
             break
         if time.time() > timeout_at:
             break
         time.sleep(0.5)
     return not patterns
Exemplo n.º 8
0
    def init_terminal(self, cmdline, **kwargs):
        """
        Instantiate a terminal with the passed cmdline and kwargs and return it.

        Additionally, it sets a reference to it in self._terminal and also collects
        an initial listing of child processes which will be used when terminating the
        terminal
        """
        self._terminal = Popen(cmdline, **kwargs)
        # Reset the previous _terminal_result if set
        self._terminal_result = None
        # A little sleep to allow the subprocess to start
        time.sleep(0.125)
        try:
            for child in psutil.Process(
                    self._terminal.pid).children(recursive=True):
                if child not in self._children:
                    self._children.append(child)
        except psutil.NoSuchProcess:
            # The terminal process is gone
            pass
        atexit.register(self.terminate)
        return self._terminal
Exemplo n.º 9
0
 def run_start_checks(self, started_at, timeout_at):
     check_ports = set(self.get_check_ports())
     if not check_ports:
         log.debug("No ports to check connection to for %s", self)
         return True
     checks_start_time = time.time()
     while time.time() <= timeout_at:
         if not self.is_running():
             raise FactoryNotStarted("{} is no longer running".format(self))
         if not check_ports:
             break
         check_ports -= ports.get_connectable_ports(check_ports)
         if check_ports:
             time.sleep(0.5)
     else:
         log.error(
             "Failed to check ports after %1.2f seconds for %s",
             time.time() - checks_start_time,
             self,
         )
         return False
     log.debug("Successfuly connected to all ports(%s) for %s",
               set(self.get_check_ports()), self)
     return True
Exemplo n.º 10
0
    def start(self, *command, max_start_attempts=None, start_timeout=None):
        if self.is_running():
            log.warning("%s is already running.", self)
            return True
        connectable = ContainerFactory.client_connectable(self.docker_client)
        if connectable is not True:
            self.terminate()
            raise RuntimeError(connectable)
        self._terminate_result = None
        atexit.register(self.terminate)
        factory_started = False
        for callback, args, kwargs in self.before_start_callbacks:
            try:
                callback(*args, **kwargs)
            except Exception as exc:  # pylint: disable=broad-except
                log.info(
                    "Exception raised when running %s: %s",
                    self._format_callback(callback, args, kwargs),
                    exc,
                    exc_info=True,
                )

        start_time = time.time()
        start_attempts = max_start_attempts or self.max_start_attempts
        current_attempt = 0
        while current_attempt <= start_attempts:
            current_attempt += 1
            if factory_started:
                break
            log.info("Starting %s. Attempt: %d of %d", self, current_attempt,
                     start_attempts)
            current_start_time = time.time()
            start_running_timeout = current_start_time + (start_timeout or
                                                          self.start_timeout)

            # Start the container
            self.container = self.docker_client.containers.run(
                self.image,
                name=self.name,
                detach=True,
                stdin_open=True,
                command=list(command) or None,
                **self.container_run_kwargs)
            while time.time() <= start_running_timeout:
                # Don't know why, but if self.container wasn't previously in a running
                # state, and now it is, we have to re-set the self.container attribute
                # so that it gives valid status information
                self.container = self.docker_client.containers.get(self.name)
                if self.container.status != "running":
                    time.sleep(0.25)
                    continue

                self.container = self.docker_client.containers.get(self.name)
                logs = self.container.logs(stdout=True,
                                           stderr=True,
                                           stream=False)
                if isinstance(logs, bytes):
                    stdout = logs.decode()
                    stderr = None
                else:
                    stdout = logs[0].decode()
                    stderr = logs[1].decode()
                if stdout and stderr:
                    log.info("Running Container Logs:\n%s\n%s", stdout, stderr)
                elif stdout:
                    log.info("Running Container Logs:\n%s", stdout)

                # If we reached this far it means that we got the running status above, and
                # now that the container has started, run start checks
                try:
                    if (self.run_container_start_checks(
                            current_start_time, start_running_timeout) is
                            False):
                        time.sleep(0.5)
                        continue
                except FactoryNotStarted:
                    self.terminate()
                    break
                log.info(
                    "The %s factory is running after %d attempts. Took %1.2f seconds",
                    self,
                    current_attempt,
                    time.time() - start_time,
                )
                factory_started = True
                break
            else:
                # We reached start_running_timeout, re-try
                try:
                    self.container.remove(force=True)
                    self.container.wait()
                except docker.errors.NotFound:
                    pass
                self.container = None
        else:
            # The factory failed to confirm it's running status
            self.terminate()
        if factory_started:
            for callback, args, kwargs in self.after_start_callbacks:
                try:
                    callback(*args, **kwargs)
                except Exception as exc:  # pylint: disable=broad-except
                    log.info(
                        "Exception raised when running %s: %s",
                        self._format_callback(callback, args, kwargs),
                        exc,
                        exc_info=True,
                    )
            # TODO: Add containers to the processes stats?!
            # if self.factories_manager and self.factories_manager.stats_processes is not None:
            #    self.factories_manager.stats_processes[self.get_display_name()] = psutil.Process(
            #        self.pid
            #    )
            return factory_started
        result = self.terminate()
        raise FactoryNotStarted(
            "The {} factory has failed to confirm running status after {} attempts, which "
            "took {:.2f} seconds({:.2f} seconds each)".format(
                self,
                current_attempt - 1,
                time.time() - start_time,
                start_timeout or self.start_timeout,
            ),
            stdout=result.stdout,
            stderr=result.stderr,
            exitcode=result.exitcode,
        )
Exemplo n.º 11
0
    def process_logs(self):
        address = "tcp://{}:{}".format(self.log_host, self.log_port)
        context = zmq.Context()
        puller = context.socket(zmq.PULL)
        exit_timeout_seconds = 5
        exit_timeout = None
        try:
            puller.bind(address)
        except zmq.ZMQError as exc:
            log.exception("Unable to bind to puller at %s", address)
            return
        try:
            self.running_event.set()
            while True:
                if not self.running_event.is_set():
                    if exit_timeout is None:
                        log.debug(
                            "Waiting %d seconds to process any remaning log messages "
                            "before exiting...",
                            exit_timeout_seconds,
                        )
                        exit_timeout = time.time() + exit_timeout_seconds

                    if time.time() >= exit_timeout:
                        log.debug(
                            "Unable to process remaining log messages in time. "
                            "Exiting anyway.")
                        break
                try:
                    try:
                        msg = puller.recv(flags=zmq.NOBLOCK)
                    except zmq.ZMQError as exc:
                        if exc.errno != zmq.EAGAIN:
                            raise
                        time.sleep(0.25)
                        continue
                    if msgpack.version >= (0, 5, 2):
                        record_dict = msgpack.loads(msg, raw=False)
                    else:
                        record_dict = msgpack.loads(msg, encoding="utf-8")
                    if record_dict is None:
                        # A sentinel to stop processing the queue
                        log.info(
                            "Received the sentinel to shutdown the log server")
                        break
                    try:
                        record_dict["message"]
                    except KeyError:
                        # This log record was msgpack dumped from Py2
                        for key, value in record_dict.copy().items():
                            skip_update = True
                            if isinstance(value, bytes):
                                value = value.decode("utf-8")
                                skip_update = False
                            if isinstance(key, bytes):
                                key = key.decode("utf-8")
                                skip_update = False
                            if skip_update is False:
                                record_dict[key] = value
                    # Just log everything, filtering will happen on the main process
                    # logging handlers
                    record = logging.makeLogRecord(record_dict)
                    logger = logging.getLogger(record.name)
                    logger.handle(record)
                except (EOFError, KeyboardInterrupt, SystemExit) as exc:
                    break
                except Exception as exc:  # pylint: disable=broad-except
                    log.warning(
                        "An exception occurred in the log server processing queue thread: %s",
                        exc,
                        exc_info=True,
                    )
        finally:
            puller.close(1)
            context.term()
Exemplo n.º 12
0
 def start(self,
           *extra_cli_arguments,
           max_start_attempts=None,
           start_timeout=None):
     """
     Start the daemon
     """
     if self.is_running():
         log.warning("%s is already running.", self)
         return True
     process_running = False
     start_time = time.time()
     start_attempts = max_start_attempts or self.max_start_attempts
     current_attempt = 0
     run_arguments = list(extra_cli_arguments)
     while True:
         if process_running:
             break
         current_attempt += 1
         if current_attempt > start_attempts:
             break
         log.info("Starting %s. Attempt: %d of %d", self, current_attempt,
                  start_attempts)
         for callback, args, kwargs in self.before_start_callbacks:
             try:
                 callback(*args, **kwargs)
             except Exception as exc:  # pylint: disable=broad-except
                 log.info(
                     "Exception raised when running %s: %s",
                     self._format_callback(callback, args, kwargs),
                     exc,
                     exc_info=True,
                 )
         current_start_time = time.time()
         start_running_timeout = current_start_time + (start_timeout or
                                                       self.start_timeout)
         if current_attempt > 1 and self.extra_cli_arguments_after_first_start_failure:
             run_arguments = list(extra_cli_arguments) + list(
                 self.extra_cli_arguments_after_first_start_failure)
         self._run(*run_arguments)
         if not self.is_running():
             # A little breathe time to allow the process to start if not started already
             time.sleep(0.5)
         while time.time() <= start_running_timeout:
             if not self.is_running():
                 log.warning("%s is no longer running", self)
                 self.terminate()
                 break
             try:
                 if self.run_start_checks(current_start_time,
                                          start_running_timeout) is False:
                     time.sleep(1)
                     continue
             except FactoryNotStarted:
                 self.terminate()
                 break
             log.info(
                 "The %s factory is running after %d attempts. Took %1.2f seconds",
                 self,
                 current_attempt,
                 time.time() - start_time,
             )
             process_running = True
             break
         else:
             # The factory failed to confirm it's running status
             self.terminate()
     if process_running:
         for callback, args, kwargs in self.after_start_callbacks:
             try:
                 callback(*args, **kwargs)
             except Exception as exc:  # pylint: disable=broad-except
                 log.info(
                     "Exception raised when running %s: %s",
                     self._format_callback(callback, args, kwargs),
                     exc,
                     exc_info=True,
                 )
         return process_running
     result = self.terminate()
     raise FactoryNotStarted(
         "The {} factory has failed to confirm running status after {} attempts, which "
         "took {:.2f} seconds".format(
             self,
             current_attempt - 1,
             time.time() - start_time,
         ),
         stdout=result.stdout,
         stderr=result.stderr,
         exitcode=result.exitcode,
     )