Exemplo n.º 1
0
 def run_container_start_checks(self, started_at, timeout_at):
     checks_start_time = time.time()
     while time.time() <= timeout_at:
         if not self.is_running():
             raise FactoryNotStarted("{} is no longer running".format(self))
         if self._container_start_checks():
             break
     else:
         log.error(
             "Failed to run container start checks after %1.2f seconds",
             time.time() - checks_start_time,
         )
         return False
     check_ports = set(self.get_check_ports())
     if not check_ports:
         return True
     while time.time() <= timeout_at:
         if not self.is_running():
             raise FactoryNotStarted("{} is no longer running".format(self))
         if not check_ports:
             break
         check_ports -= ports.get_connectable_ports(check_ports)
         if check_ports:
             time.sleep(0.5)
     else:
         log.error(
             "Failed to check ports after %1.2f seconds",
             time.time() - checks_start_time,
         )
         return False
     return True
Exemplo n.º 2
0
    def run_start_checks(self, started_at, timeout_at):
        if not super().run_start_checks(started_at, timeout_at):
            return False
        if not self.event_listener:
            log.debug(
                "The 'event_listener' attribute is not set. Not checking events..."
            )
            return True

        check_events = set(self.get_check_events())
        if not check_events:
            log.debug("No events to listen to for %s", self)
            return True
        checks_start_time = time.time()
        while time.time() <= timeout_at:
            if not self.is_running():
                raise FactoryNotStarted("{} is no longer running".format(self))
            if not check_events:
                break
            check_events -= self.event_listener.get_events(
                check_events, after_time=started_at)
            if check_events:
                time.sleep(0.5)
        else:
            log.error(
                "Failed to check events after %1.2f seconds for %s",
                time.time() - checks_start_time,
                self,
            )
            return False
        log.debug("Successfuly checked for all events(%s) for %s",
                  set(self.get_check_events()), self)
        return True
Exemplo n.º 3
0
 def start(self):
     atexit.register(self.terminate)
     connectable = ContainerFactory.client_connectable(self.docker_client)
     if connectable is not True:
         pytest.fail(connectable)
     start_time = time.time()
     start_timeout = start_time + 30
     self.container = self.docker_client.containers.run(
         self.image, name=self.name, detach=True, stdin_open=True, **self.container_run_kwargs
     )
     while True:
         if start_timeout <= time.time():
             result = self.terminate()
             raise FactoryNotStarted(
                 "Container failed to start",
                 stdout=result.stdout,
                 stderr=result.stderr,
                 exitcode=result.exitcode,
             )
         container = self.docker_client.containers.get(self.container.id)
         if container.status == "running":
             self.container = container
             break
         time.sleep(1)
     return True
Exemplo n.º 4
0
 def run_start_checks(self, started_at, timeout_at):
     check_ports = set(self.get_check_ports())
     if not check_ports:
         log.debug("No ports to check connection to for %s", self)
         return True
     checks_start_time = time.time()
     while time.time() <= timeout_at:
         if not self.is_running():
             raise FactoryNotStarted("{} is no longer running".format(self))
         if not check_ports:
             break
         check_ports -= ports.get_connectable_ports(check_ports)
         if check_ports:
             time.sleep(0.5)
     else:
         log.error(
             "Failed to check ports after %1.2f seconds for %s",
             time.time() - checks_start_time,
             self,
         )
         return False
     log.debug("Successfuly connected to all ports(%s) for %s",
               set(self.get_check_ports()), self)
     return True
Exemplo n.º 5
0
    def start(self, *command, max_start_attempts=None, start_timeout=None):
        if self.is_running():
            log.warning("%s is already running.", self)
            return True
        connectable = ContainerFactory.client_connectable(self.docker_client)
        if connectable is not True:
            self.terminate()
            raise RuntimeError(connectable)
        self._terminate_result = None
        atexit.register(self.terminate)
        factory_started = False
        for callback, args, kwargs in self.before_start_callbacks:
            try:
                callback(*args, **kwargs)
            except Exception as exc:  # pylint: disable=broad-except
                log.info(
                    "Exception raised when running %s: %s",
                    self._format_callback(callback, args, kwargs),
                    exc,
                    exc_info=True,
                )

        start_time = time.time()
        start_attempts = max_start_attempts or self.max_start_attempts
        current_attempt = 0
        while current_attempt <= start_attempts:
            current_attempt += 1
            if factory_started:
                break
            log.info("Starting %s. Attempt: %d of %d", self, current_attempt,
                     start_attempts)
            current_start_time = time.time()
            start_running_timeout = current_start_time + (start_timeout or
                                                          self.start_timeout)

            # Start the container
            self.container = self.docker_client.containers.run(
                self.image,
                name=self.name,
                detach=True,
                stdin_open=True,
                command=list(command) or None,
                **self.container_run_kwargs)
            while time.time() <= start_running_timeout:
                # Don't know why, but if self.container wasn't previously in a running
                # state, and now it is, we have to re-set the self.container attribute
                # so that it gives valid status information
                self.container = self.docker_client.containers.get(self.name)
                if self.container.status != "running":
                    time.sleep(0.25)
                    continue

                self.container = self.docker_client.containers.get(self.name)
                logs = self.container.logs(stdout=True,
                                           stderr=True,
                                           stream=False)
                if isinstance(logs, bytes):
                    stdout = logs.decode()
                    stderr = None
                else:
                    stdout = logs[0].decode()
                    stderr = logs[1].decode()
                if stdout and stderr:
                    log.info("Running Container Logs:\n%s\n%s", stdout, stderr)
                elif stdout:
                    log.info("Running Container Logs:\n%s", stdout)

                # If we reached this far it means that we got the running status above, and
                # now that the container has started, run start checks
                try:
                    if (self.run_container_start_checks(
                            current_start_time, start_running_timeout) is
                            False):
                        time.sleep(0.5)
                        continue
                except FactoryNotStarted:
                    self.terminate()
                    break
                log.info(
                    "The %s factory is running after %d attempts. Took %1.2f seconds",
                    self,
                    current_attempt,
                    time.time() - start_time,
                )
                factory_started = True
                break
            else:
                # We reached start_running_timeout, re-try
                try:
                    self.container.remove(force=True)
                    self.container.wait()
                except docker.errors.NotFound:
                    pass
                self.container = None
        else:
            # The factory failed to confirm it's running status
            self.terminate()
        if factory_started:
            for callback, args, kwargs in self.after_start_callbacks:
                try:
                    callback(*args, **kwargs)
                except Exception as exc:  # pylint: disable=broad-except
                    log.info(
                        "Exception raised when running %s: %s",
                        self._format_callback(callback, args, kwargs),
                        exc,
                        exc_info=True,
                    )
            # TODO: Add containers to the processes stats?!
            # if self.factories_manager and self.factories_manager.stats_processes is not None:
            #    self.factories_manager.stats_processes[self.get_display_name()] = psutil.Process(
            #        self.pid
            #    )
            return factory_started
        result = self.terminate()
        raise FactoryNotStarted(
            "The {} factory has failed to confirm running status after {} attempts, which "
            "took {:.2f} seconds({:.2f} seconds each)".format(
                self,
                current_attempt - 1,
                time.time() - start_time,
                start_timeout or self.start_timeout,
            ),
            stdout=result.stdout,
            stderr=result.stderr,
            exitcode=result.exitcode,
        )
Exemplo n.º 6
0
 def start(self,
           *extra_cli_arguments,
           max_start_attempts=None,
           start_timeout=None):
     """
     Start the daemon
     """
     if self.is_running():
         log.warning("%s is already running.", self)
         return True
     process_running = False
     start_time = time.time()
     start_attempts = max_start_attempts or self.max_start_attempts
     current_attempt = 0
     run_arguments = list(extra_cli_arguments)
     while True:
         if process_running:
             break
         current_attempt += 1
         if current_attempt > start_attempts:
             break
         log.info("Starting %s. Attempt: %d of %d", self, current_attempt,
                  start_attempts)
         for callback, args, kwargs in self.before_start_callbacks:
             try:
                 callback(*args, **kwargs)
             except Exception as exc:  # pylint: disable=broad-except
                 log.info(
                     "Exception raised when running %s: %s",
                     self._format_callback(callback, args, kwargs),
                     exc,
                     exc_info=True,
                 )
         current_start_time = time.time()
         start_running_timeout = current_start_time + (start_timeout or
                                                       self.start_timeout)
         if current_attempt > 1 and self.extra_cli_arguments_after_first_start_failure:
             run_arguments = list(extra_cli_arguments) + list(
                 self.extra_cli_arguments_after_first_start_failure)
         self._run(*run_arguments)
         if not self.is_running():
             # A little breathe time to allow the process to start if not started already
             time.sleep(0.5)
         while time.time() <= start_running_timeout:
             if not self.is_running():
                 log.warning("%s is no longer running", self)
                 self.terminate()
                 break
             try:
                 if self.run_start_checks(current_start_time,
                                          start_running_timeout) is False:
                     time.sleep(1)
                     continue
             except FactoryNotStarted:
                 self.terminate()
                 break
             log.info(
                 "The %s factory is running after %d attempts. Took %1.2f seconds",
                 self,
                 current_attempt,
                 time.time() - start_time,
             )
             process_running = True
             break
         else:
             # The factory failed to confirm it's running status
             self.terminate()
     if process_running:
         for callback, args, kwargs in self.after_start_callbacks:
             try:
                 callback(*args, **kwargs)
             except Exception as exc:  # pylint: disable=broad-except
                 log.info(
                     "Exception raised when running %s: %s",
                     self._format_callback(callback, args, kwargs),
                     exc,
                     exc_info=True,
                 )
         return process_running
     result = self.terminate()
     raise FactoryNotStarted(
         "The {} factory has failed to confirm running status after {} attempts, which "
         "took {:.2f} seconds".format(
             self,
             current_attempt - 1,
             time.time() - start_time,
         ),
         stdout=result.stdout,
         stderr=result.stderr,
         exitcode=result.exitcode,
     )
Exemplo n.º 7
0
def start_factory(factory_class,
                  start_timeout=10,
                  max_attempts=3,
                  event_listener=None,
                  salt_factories=None,
                  **factory_class_kwargs):
    """
    Returns a running factory

    Args:
        cli_script_name(str):
            The CLI script which starts the daemon
        daemon_class(:py:class:`~saltfactories.utils.processes.bases.FactoryDaemonScriptBase`):
            The class to use to instantiate the factory instance.
        start_timeout(int):
            The amount of time, in seconds, to wait, until a factory is considered as not started.
        max_attempts(int):
            How many times to attempt to start the daemon in case of failure
        event_listener(:py:class:`~saltfactories.utils.event_listener.EventListener`):
            An instance of :py:class:`~saltfactories.utils.event_listener.EventListener` in case the daemon
            is a salt daemon.
        **daemon_class_kwargs(dict):
            Keyword arguments to pass to the ``daemon_class`` when instantiating it.

    Raises:
        FactoryNotStarted:
            Raised when a factory fails to start or when the code used to confirm that the daemon is up also fails.
        RuntimeError:
            `RuntimeError` is raised when a factory defines
            :py:meth:`~saltfactories.utils.processes.salts.SaltDaemonScriptBase.get_check_events` but no
            ``event_listener`` argument was passed.

    Returns:
        An instance of the ``factory_class``, which is a subclass of
        :py:class:`~saltfactories.utils.processes.bases.FactoryDaemonScriptBase`
    """
    attempts = 1

    checks_start_time = time.time()
    while attempts <= max_attempts:  # pylint: disable=too-many-nested-blocks
        factory = factory_class(**factory_class_kwargs)
        log.info("Starting %s. Attempt: %s", factory, attempts)
        start_time = time.time()
        checks_expire_time = start_time + start_timeout
        factory.start()
        attempts += 1
        if factory.is_running():
            try:
                try:
                    check_ports = set(factory.get_check_ports())
                    if check_ports:
                        log.debug("Checking %s for connectable ports: %s",
                                  factory, check_ports)
                except AttributeError:
                    check_ports = False

                try:
                    check_events = set(factory.get_check_events())
                    if not event_listener:
                        factory.terminate()
                        raise RuntimeError(
                            "Factory {} want's to have events checked but no 'event_listener' was "
                            "passed to start_daemon()".format(factory))
                    log.debug("Checking %s for event patterns: %s", factory,
                              check_events)
                except AttributeError:
                    check_events = False

                try:
                    extra_checks_method = factory.run_extra_checks
                    extra_checks_passed = False
                    if not salt_factories:
                        raise RuntimeError(
                            "Factory {} defines the run_extra_checks method but no 'salt_factories' was "
                            "passed to start_daemon()".format(factory))
                except AttributeError:
                    extra_checks_method = False
                    extra_checks_passed = True

                all_checks_passed = False
                while time.time() <= checks_expire_time:
                    if not factory.is_running():
                        # If meanwhile the factory dies, break the loop
                        break

                    if not check_ports and not check_events and extra_checks_passed:
                        # If either there are no ports and no events to check, or,
                        # they've all been checked, break the loop
                        all_checks_passed = True
                        break

                    if check_ports:
                        check_ports -= ports.get_connectable_ports(check_ports)

                    if check_events:
                        check_events -= event_listener.get_events(
                            check_events, after_time=start_time)

                    if extra_checks_method and not check_events and not check_events:
                        # Only run the extra checks after check_events and check_ports are
                        # checked since those likely involve shelling out
                        extra_checks_passed = extra_checks_method(
                            salt_factories)

                    # Let's not peg the CPU
                    time.sleep(0.5)

                if all_checks_passed is False:
                    result = factory.terminate()
                    if attempts >= max_attempts:
                        raise FactoryNotStarted(
                            "The {} factory has failed to confirm running status after {} attempts, which "
                            "took {:.2f} seconds({:.2f} seconds each)".format(
                                factory,
                                attempts,
                                time.time() - checks_start_time,
                                start_timeout,
                            ),
                            stdout=result.stdout,
                            stderr=result.stderr,
                            exitcode=result.exitcode,
                        )
                    continue
            except FactoryNotStarted:
                raise
            except Exception as exc:  # pylint: disable=broad-except
                log.exception("Exception caught on %s: %s",
                              factory,
                              exc,
                              exc_info=True)
                result = factory.terminate()
                if attempts >= max_attempts:
                    raise FactoryNotStarted(
                        "The {} factory has failed to confirm running status after {} attempts and raised an "
                        "exception: {}. Took {:.2f} seconds({:.2f} seconds each attempt)."
                        .format(
                            factory,
                            attempts,
                            str(exc),
                            time.time() - start_time,
                            start_timeout,
                        ),
                        stdout=result.stdout,
                        stderr=result.stderr,
                        exitcode=result.exitcode,
                        exc=sys.exc_info(),
                    )

                # A little pause before retrying
                time.sleep(1)
                continue

            # A little breathing before returning the factory
            time.sleep(0.25)
            log.info(
                "The %s factory is running after %d attempts. Took %1.2f seconds",
                factory,
                attempts,
                time.time() - checks_start_time,
            )
            break
        else:
            factory.terminate()
            # A little pause before retrying
            time.sleep(1)
            continue
    else:
        stderr = stdout = exitcode = None
        if factory is not None:
            result = factory.terminate()
            stderr = result.stderr
            stdout = result.stdout
            exitcode = result.exitcode
        raise FactoryNotStarted(
            "The {} factory has failed to confirm running status after {} attempts, which "
            "took {:.2f} seconds.".format(factory, attempts,
                                          time.time() - checks_start_time),
            stdout=stdout,
            stderr=stderr,
            exitcode=exitcode,
        )
    return factory