Exemplo n.º 1
0
    def connect(self, raise_on_any_error=False):
        """
        Connect to hosts in hosts list. Returns status of connect as a dict.

        :param raise_on_any_error: Optional Raise an exception even if connecting to one
                                   of the hosts fails.
        :type raise_on_any_error: ``boolean``

        :rtype: ``dict`` of ``str`` to ``dict``
        """
        results = {}

        for host in self._hosts:
            while not concurrency_lib.is_green_pool_free(self._pool):
                concurrency_lib.sleep(self._scan_interval)
            self._pool.spawn(self._connect,
                             host=host,
                             results=results,
                             raise_on_any_error=raise_on_any_error)

        concurrency_lib.green_pool_wait_all(self._pool)

        if self._successful_connects < 1:
            # We definitely have to raise an exception in this case.
            LOG.error('Unable to connect to any of the hosts.',
                      extra={'connect_results': results})
            msg = (
                'Unable to connect to any one of the hosts: %s.\n\n connect_errors=%s'
                % (self._hosts, json.dumps(results, indent=2)))
            raise NoHostsConnectedToException(msg)

        return results
Exemplo n.º 2
0
    def process_task(self, body, message):
        LOG.debug("process_task")
        LOG.debug("     body: %s", body)
        LOG.debug("     message.properties: %s", message.properties)
        LOG.debug("     message.delivery_info: %s", message.delivery_info)

        routing_key = message.delivery_info.get("routing_key", "")
        handler = self._handlers.get(routing_key, None)

        try:
            if not handler:
                LOG.debug("Skipping message %s as no handler was found.", message)
                return

            trigger_type = getattr(body, "type", None)
            if self._trigger_types and trigger_type not in self._trigger_types:
                LOG.debug(
                    "Skipping message %s since trigger_type doesn't match (type=%s)",
                    message,
                    trigger_type,
                )
                return

            try:
                handler(body)
            except Exception as e:
                LOG.exception(
                    "Handling failed. Message body: %s. Exception: %s",
                    body,
                    six.text_type(e),
                )
        finally:
            message.ack()

        concurrency.sleep(self.sleep_interval)
Exemplo n.º 3
0
    def test_sensor_watch_queue_gets_deleted_on_stop(self):
        def create_handler(sensor_db):
            pass

        def update_handler(sensor_db):
            pass

        def delete_handler(sensor_db):
            pass

        sensor_watcher = SensorWatcher(create_handler,
                                       update_handler,
                                       delete_handler,
                                       queue_suffix='covfefe')
        sensor_watcher.start()
        sw_queues = self._get_sensor_watcher_amqp_queues(
            queue_name='st2.sensor.watch.covfefe')

        start = monotonic()
        done = False
        while not done:
            concurrency.sleep(0.01)
            sw_queues = self._get_sensor_watcher_amqp_queues(
                queue_name='st2.sensor.watch.covfefe')
            done = len(sw_queues) > 0 or ((monotonic() - start) < 5)

        sensor_watcher.stop()
        sw_queues = self._get_sensor_watcher_amqp_queues(
            queue_name='st2.sensor.watch.covfefe')
        self.assertTrue(len(sw_queues) == 0)
Exemplo n.º 4
0
    def process_task(self, body, message):
        LOG.debug('process_task')
        LOG.debug('     body: %s', body)
        LOG.debug('     message.properties: %s', message.properties)
        LOG.debug('     message.delivery_info: %s', message.delivery_info)

        routing_key = message.delivery_info.get('routing_key', '')
        handler = self._handlers.get(routing_key, None)

        try:
            if not handler:
                LOG.debug('Skipping message %s as no handler was found.',
                          message)
                return

            trigger_type = getattr(body, 'type', None)
            if self._trigger_types and trigger_type not in self._trigger_types:
                LOG.debug(
                    'Skipping message %s since trigger_type doesn\'t match (type=%s)',
                    message, trigger_type)
                return

            try:
                handler(body)
            except Exception as e:
                LOG.exception(
                    'Handling failed. Message body: %s. Exception: %s', body,
                    six.text_type(e))
        finally:
            message.ack()

        concurrency.sleep(self.sleep_interval)
Exemplo n.º 5
0
    def test_child_processes_are_killed_on_sigint(self):
        process = self._start_sensor_container()

        # Give it some time to start up
        concurrency.sleep(7)

        # Assert process has started and is running
        self.assertProcessIsRunning(process=process)

        # Verify container process and children sensor / wrapper processes are running
        pp = psutil.Process(process.pid)
        children_pp = pp.children()
        self.assertEqual(pp.cmdline()[1:], DEFAULT_CMD[1:])
        self.assertEqual(len(children_pp), 1)

        # Send SIGINT
        process.send_signal(signal.SIGINT)

        # SIGINT causes graceful shutdown so give it some time to gracefuly shut down the sensor
        # child processes
        concurrency.sleep(PROCESS_EXIT_TIMEOUT + 1)

        # Verify parent and children processes have exited
        self.assertProcessExited(proc=pp)
        self.assertProcessExited(proc=children_pp[0])

        self.remove_process(process=process)
Exemplo n.º 6
0
    def run(self):
        self._run_all_sensors()

        success_exception_cls = concurrency.get_greenlet_exit_exception_class()

        try:
            while not self._stopped:
                # Poll for all running processes
                sensor_ids = list(self._sensors.keys())

                if len(sensor_ids) >= 1:
                    LOG.debug('%d active sensor(s)' % (len(sensor_ids)))
                    self._poll_sensors_for_results(sensor_ids)
                else:
                    LOG.debug('No active sensors')

                concurrency.sleep(self._poll_interval)
        except success_exception_cls:
            # This exception is thrown when sensor container manager
            # kills the thread which runs process container. Not sure
            # if this is the best thing to do.
            self._stopped = True
            return SUCCESS_EXIT_CODE
        except:
            LOG.exception('Container failed to run sensors.')
            self._stopped = True
            return FAILURE_EXIT_CODE

        self._stopped = True
        LOG.error('Process container stopped.')

        exit_code = self._exit_code or SUCCESS_EXIT_CODE
        return exit_code
Exemplo n.º 7
0
    def _perform_garbage_collection(self):
        LOG.info("Performing garbage collection...")

        proc_message = "Performing garbage collection for %s."
        skip_message = "Skipping garbage collection for %s since it's not configured."

        # Note: We sleep for a bit between garbage collection of each object type to prevent busy
        # waiting
        obj_type = "action executions"

        if (
            self._action_executions_ttl
            and self._action_executions_ttl >= MINIMUM_TTL_DAYS
        ):
            LOG.info(proc_message, obj_type)
            self._purge_action_executions()
            concurrency.sleep(self._sleep_delay)
        else:
            LOG.debug(skip_message, obj_type)

        obj_type = "action executions output"

        if (
            self._action_executions_output_ttl
            and self._action_executions_output_ttl >= MINIMUM_TTL_DAYS_EXECUTION_OUTPUT
        ):
            LOG.info(proc_message, obj_type)
            self._purge_action_executions_output()
            concurrency.sleep(self._sleep_delay)
        else:
            LOG.debug(skip_message, obj_type)

        obj_type = "trigger instances"

        if (
            self._trigger_instances_ttl
            and self._trigger_instances_ttl >= MINIMUM_TTL_DAYS
        ):
            LOG.info(proc_message, obj_type)
            self._purge_trigger_instances()
            concurrency.sleep(self._sleep_delay)
        else:
            LOG.debug(skip_message, obj_type)

        obj_type = "inquiries"
        if self._purge_inquiries:
            LOG.info(proc_message, obj_type)
            self._timeout_inquiries()
            concurrency.sleep(self._sleep_delay)
        else:
            LOG.debug(skip_message, obj_type)

        obj_type = "orphaned workflow executions"
        if self._workflow_execution_max_idle > 0:
            LOG.info(proc_message, obj_type)
            self._purge_orphaned_workflow_executions()
            concurrency.sleep(self._sleep_delay)
        else:
            LOG.debug(skip_message, obj_type)
Exemplo n.º 8
0
 def test_no_sensors_dont_quit(self):
     process_container = ProcessSensorContainer(None, poll_interval=0.1)
     process_container_thread = concurrency.spawn(process_container.run)
     concurrency.sleep(0.5)
     self.assertEqual(process_container.running(), 0)
     self.assertEqual(process_container.stopped(), False)
     process_container.shutdown()
     process_container_thread.kill()
Exemplo n.º 9
0
    def _main_loop(self):
        while self._running:
            self._perform_garbage_collection()

            LOG.info(
                "Sleeping for %s seconds before next garbage collection..." %
                (self._collection_interval))
            concurrency.sleep(self._collection_interval)
Exemplo n.º 10
0
    def _execute_in_pool(self, execute_method, **kwargs):
        results = {}

        for host in self._bad_hosts.keys():
            results[host] = self._bad_hosts[host]

        for host in self._hosts_client.keys():
            while not self._pool.free():
                concurrency_lib.sleep(self._scan_interval)
            self._pool.spawn(execute_method, host=host, results=results, **kwargs)

        concurrency_lib.green_pool_wait_all(self._pool)
        return results
Exemplo n.º 11
0
    def _respawn_sensor(self, sensor_id, sensor, exit_code):
        """
        Method for respawning a sensor which died with a non-zero exit code.
        """
        extra = {"sensor_id": sensor_id, "sensor": sensor}

        if self._single_sensor_mode:
            # In single sensor mode we want to exit immediately on failure
            LOG.info(
                "Not respawning a sensor since running in single sensor mode",
                extra=extra,
            )

            self._stopped = True
            self._exit_code = exit_code
            return

        if self._stopped:
            LOG.debug("Stopped, not respawning a dead sensor", extra=extra)
            return

        should_respawn = self._should_respawn_sensor(sensor_id=sensor_id,
                                                     sensor=sensor,
                                                     exit_code=exit_code)

        if not should_respawn:
            LOG.debug("Not respawning a dead sensor", extra=extra)
            return

        LOG.debug("Respawning dead sensor", extra=extra)

        self._sensor_respawn_counts[sensor_id] += 1
        sleep_delay = SENSOR_RESPAWN_DELAY * self._sensor_respawn_counts[
            sensor_id]
        concurrency.sleep(sleep_delay)

        try:
            self._spawn_sensor_process(sensor=sensor)
        except Exception as e:
            LOG.warning(six.text_type(e), exc_info=True)

            # Disable sensor which we are unable to start
            del self._sensors[sensor_id]
Exemplo n.º 12
0
    def test_inquiry_garbage_collection(self):
        now = date_utils.get_datetime_utc_now()

        # Insert some mock Inquiries with start_timestamp > TTL
        old_inquiry_count = 15
        timestamp = now - datetime.timedelta(minutes=3)
        for index in range(0, old_inquiry_count):
            self._create_inquiry(ttl=2, timestamp=timestamp)

        # Insert some mock Inquiries with TTL set to a "disabled" value
        disabled_inquiry_count = 3
        timestamp = now - datetime.timedelta(minutes=3)
        for index in range(0, disabled_inquiry_count):
            self._create_inquiry(ttl=0, timestamp=timestamp)

        # Insert some mock Inquiries with start_timestamp < TTL
        new_inquiry_count = 5
        timestamp = now - datetime.timedelta(minutes=3)
        for index in range(0, new_inquiry_count):
            self._create_inquiry(ttl=15, timestamp=timestamp)

        filters = {"status": action_constants.LIVEACTION_STATUS_PENDING}
        inquiries = list(ActionExecution.query(**filters))
        self.assertEqual(
            len(inquiries),
            (old_inquiry_count + new_inquiry_count + disabled_inquiry_count),
        )

        # Start garbage collector
        process = self._start_garbage_collector()

        # Give it some time to perform garbage collection and kill it
        concurrency.sleep(15)
        process.send_signal(signal.SIGKILL)
        self.remove_process(process=process)

        # Expired Inquiries should have been garbage collected
        inquiries = list(ActionExecution.query(**filters))
        self.assertEqual(len(inquiries),
                         new_inquiry_count + disabled_inquiry_count)
Exemplo n.º 13
0
    def run(self):
        self._running = True

        self._register_signal_handlers()

        # Wait a couple of seconds before performing initial collection to prevent thundering herd
        # effect when restarting multiple services at the same time
        jitter_seconds = random.uniform(0, 3)
        concurrency.sleep(jitter_seconds)

        success_exception_cls = concurrency.get_greenlet_exit_exception_class()

        try:
            self._main_loop()
        except success_exception_cls:
            self._running = False
            return SUCCESS_EXIT_CODE
        except Exception as e:
            LOG.exception("Exception in the garbage collector: %s" % (six.text_type(e)))
            self._running = False
            return FAILURE_EXIT_CODE

        return SUCCESS_EXIT_CODE
Exemplo n.º 14
0
    def test_child_processes_are_killed_on_sigkill(self):
        process = self._start_sensor_container()

        # Give it some time to start up
        concurrency.sleep(5)

        # Verify container process and children sensor / wrapper processes are running
        pp = psutil.Process(process.pid)
        children_pp = pp.children()
        self.assertEqual(pp.cmdline()[1:], DEFAULT_CMD[1:])
        self.assertEqual(len(children_pp), 1)

        # Send SIGKILL
        process.send_signal(signal.SIGKILL)

        # Note: On SIGKILL processes should be killed instantly
        concurrency.sleep(1)

        # Verify parent and children processes have exited
        self.assertProcessExited(proc=pp)
        self.assertProcessExited(proc=children_pp[0])

        self.remove_process(process=process)
Exemplo n.º 15
0
    def test_single_sensor_mode(self):
        # 1. --sensor-ref not provided
        cmd = [
            PYTHON_BINARY,
            BINARY,
            "--config-file",
            ST2_CONFIG_PATH,
            "--single-sensor-mode",
        ]

        process = self._start_sensor_container(cmd=cmd)
        pp = psutil.Process(process.pid)

        # Give it some time to start up
        concurrency.sleep(5)

        stdout = process.stdout.read()
        self.assertTrue(
            (
                b"--sensor-ref argument must be provided when running in single sensor "
                b"mode"
            )
            in stdout
        )
        self.assertProcessExited(proc=pp)
        self.remove_process(process=process)

        # 2. sensor ref provided
        cmd = [
            BINARY,
            "--config-file",
            ST2_CONFIG_PATH,
            "--single-sensor-mode",
            "--sensor-ref=examples.SampleSensorExit",
        ]

        process = self._start_sensor_container(cmd=cmd)
        pp = psutil.Process(process.pid)

        # Give it some time to start up
        concurrency.sleep(1)

        # Container should exit and not respawn a sensor in single sensor mode
        stdout = process.stdout.read()

        self.assertTrue(
            b"Process for sensor examples.SampleSensorExit has exited with code 110"
        )
        self.assertTrue(b"Not respawning a sensor since running in single sensor mode")
        self.assertTrue(b"Process container quit with exit_code 110.")

        concurrency.sleep(2)
        self.assertProcessExited(proc=pp)

        self.remove_process(process=process)
Exemplo n.º 16
0
    def run(self,
            cmd,
            timeout=None,
            quote=False,
            call_line_handler_func=False):
        """
        Note: This function is based on paramiko's exec_command()
        method.

        :param timeout: How long to wait (in seconds) for the command to finish (optional).
        :type timeout: ``float``

        :param call_line_handler_func: True to call handle_stdout_line_func function for each line
                                       of received stdout and handle_stderr_line_func for each
                                       line of stderr.
        :type call_line_handler_func: ``bool``
        """

        if quote:
            cmd = quote_unix(cmd)

        extra = {'_cmd': cmd}
        self.logger.info('Executing command', extra=extra)

        # Use the system default buffer size
        bufsize = -1

        transport = self.client.get_transport()
        chan = transport.open_session()

        start_time = time.time()
        if cmd.startswith('sudo'):
            # Note that fabric does this as well. If you set pty, stdout and stderr
            # streams will be combined into one.
            # NOTE: If pty is used, every new line character \n will be converted to \r\n which
            # isn't desired. Because of that we sanitize the output and replace \r\n with \n at the
            # bottom of this method
            uses_pty = True
            chan.get_pty()
        else:
            uses_pty = False
        chan.exec_command(cmd)

        stdout = StringIO()
        stderr = StringIO()

        # Create a stdin file and immediately close it to prevent any
        # interactive script from hanging the process.
        stdin = chan.makefile('wb', bufsize)
        stdin.close()

        # Receive all the output
        # Note #1: This is used instead of chan.makefile approach to prevent
        # buffering issues and hanging if the executed command produces a lot
        # of output.
        #
        # Note #2: If you are going to remove "ready" checks inside the loop
        # you are going to have a bad time. Trying to consume from a channel
        # which is not ready will block for indefinitely.
        exit_status_ready = chan.exit_status_ready()

        if exit_status_ready:
            stdout_data = self._consume_stdout(
                chan=chan, call_line_handler_func=call_line_handler_func)
            stdout_data = stdout_data.getvalue()

            stderr_data = self._consume_stderr(
                chan=chan, call_line_handler_func=call_line_handler_func)
            stderr_data = stderr_data.getvalue()

            stdout.write(stdout_data)
            stderr.write(stderr_data)

        while not exit_status_ready:
            current_time = time.time()
            elapsed_time = (current_time - start_time)

            if timeout and (elapsed_time > timeout):
                # TODO: Is this the right way to clean up?
                chan.close()

                stdout = sanitize_output(stdout.getvalue(), uses_pty=uses_pty)
                stderr = sanitize_output(stderr.getvalue(), uses_pty=uses_pty)
                raise SSHCommandTimeoutError(cmd=cmd,
                                             timeout=timeout,
                                             stdout=stdout,
                                             stderr=stderr)

            stdout_data = self._consume_stdout(
                chan=chan, call_line_handler_func=call_line_handler_func)
            stdout_data = stdout_data.getvalue()

            stderr_data = self._consume_stderr(
                chan=chan, call_line_handler_func=call_line_handler_func)
            stderr_data = stderr_data.getvalue()

            stdout.write(stdout_data)
            stderr.write(stderr_data)

            # We need to check the exit status here, because the command could
            # print some output and exit during this sleep below.
            exit_status_ready = chan.exit_status_ready()

            if exit_status_ready:
                break

            # Short sleep to prevent busy waiting
            concurrency.sleep(self.SLEEP_DELAY)
        # print('Wait over. Channel must be ready for host: %s' % self.hostname)

        # Receive the exit status code of the command we ran.
        status = chan.recv_exit_status()

        stdout = sanitize_output(stdout.getvalue(), uses_pty=uses_pty)
        stderr = sanitize_output(stderr.getvalue(), uses_pty=uses_pty)

        extra = {'_status': status, '_stdout': stdout, '_stderr': stderr}
        self.logger.debug('Command finished', extra=extra)

        return [stdout, stderr, status]
Exemplo n.º 17
0
    def test_garbage_collection(self):
        now = date_utils.get_datetime_utc_now()
        status = action_constants.LIVEACTION_STATUS_SUCCEEDED

        # Insert come mock ActionExecutionDB objects with start_timestamp < TTL defined in the
        # config
        old_executions_count = 15
        ttl_days = 30  # > 20
        timestamp = (now - datetime.timedelta(days=ttl_days))
        for index in range(0, old_executions_count):
            action_execution_db = ActionExecutionDB(
                start_timestamp=timestamp,
                end_timestamp=timestamp,
                status=status,
                action={'ref': 'core.local'},
                runner={'name': 'local-shell-cmd'},
                liveaction={'ref': 'foo'})
            ActionExecution.add_or_update(action_execution_db)

            stdout_db = ActionExecutionOutputDB(execution_id=str(
                action_execution_db.id),
                                                action_ref='core.local',
                                                runner_ref='dummy',
                                                timestamp=timestamp,
                                                output_type='stdout',
                                                data='stdout')
            ActionExecutionOutput.add_or_update(stdout_db)

            stderr_db = ActionExecutionOutputDB(execution_id=str(
                action_execution_db.id),
                                                action_ref='core.local',
                                                runner_ref='dummy',
                                                timestamp=timestamp,
                                                output_type='stderr',
                                                data='stderr')
            ActionExecutionOutput.add_or_update(stderr_db)

        # Insert come mock ActionExecutionDB objects with start_timestamp > TTL defined in the
        # config
        new_executions_count = 5
        ttl_days = 2  # < 20
        timestamp = (now - datetime.timedelta(days=ttl_days))
        for index in range(0, new_executions_count):
            action_execution_db = ActionExecutionDB(
                start_timestamp=timestamp,
                end_timestamp=timestamp,
                status=status,
                action={'ref': 'core.local'},
                runner={'name': 'local-shell-cmd'},
                liveaction={'ref': 'foo'})
            ActionExecution.add_or_update(action_execution_db)

            stdout_db = ActionExecutionOutputDB(execution_id=str(
                action_execution_db.id),
                                                action_ref='core.local',
                                                runner_ref='dummy',
                                                timestamp=timestamp,
                                                output_type='stdout',
                                                data='stdout')
            ActionExecutionOutput.add_or_update(stdout_db)

            stderr_db = ActionExecutionOutputDB(execution_id=str(
                action_execution_db.id),
                                                action_ref='core.local',
                                                runner_ref='dummy',
                                                timestamp=timestamp,
                                                output_type='stderr',
                                                data='stderr')
            ActionExecutionOutput.add_or_update(stderr_db)

        # Insert some mock output objects where start_timestamp > action_executions_output_ttl
        new_output_count = 5
        ttl_days = 15  # > 10 and < 20
        timestamp = (now - datetime.timedelta(days=ttl_days))
        for index in range(0, new_output_count):
            action_execution_db = ActionExecutionDB(
                start_timestamp=timestamp,
                end_timestamp=timestamp,
                status=status,
                action={'ref': 'core.local'},
                runner={'name': 'local-shell-cmd'},
                liveaction={'ref': 'foo'})
            ActionExecution.add_or_update(action_execution_db)

            stdout_db = ActionExecutionOutputDB(execution_id=str(
                action_execution_db.id),
                                                action_ref='core.local',
                                                runner_ref='dummy',
                                                timestamp=timestamp,
                                                output_type='stdout',
                                                data='stdout')
            ActionExecutionOutput.add_or_update(stdout_db)

            stderr_db = ActionExecutionOutputDB(execution_id=str(
                action_execution_db.id),
                                                action_ref='core.local',
                                                runner_ref='dummy',
                                                timestamp=timestamp,
                                                output_type='stderr',
                                                data='stderr')
            ActionExecutionOutput.add_or_update(stderr_db)

        execs = ActionExecution.get_all()
        self.assertEqual(
            len(execs),
            (old_executions_count + new_executions_count + new_output_count))

        stdout_dbs = ActionExecutionOutput.query(output_type='stdout')
        self.assertEqual(
            len(stdout_dbs),
            (old_executions_count + new_executions_count + new_output_count))

        stderr_dbs = ActionExecutionOutput.query(output_type='stderr')
        self.assertEqual(
            len(stderr_dbs),
            (old_executions_count + new_executions_count + new_output_count))

        # Start garbage collector
        process = self._start_garbage_collector()

        # Give it some time to perform garbage collection and kill it
        concurrency.sleep(15)
        process.send_signal(signal.SIGKILL)
        self.remove_process(process=process)

        # Old executions and corresponding objects should have been garbage collected
        execs = ActionExecution.get_all()
        self.assertEqual(len(execs), (new_executions_count + new_output_count))

        # Collection for output objects older than 10 days is also enabled, so those objects
        # should be deleted as well
        stdout_dbs = ActionExecutionOutput.query(output_type='stdout')
        self.assertEqual(len(stdout_dbs), (new_executions_count))

        stderr_dbs = ActionExecutionOutput.query(output_type='stderr')
        self.assertEqual(len(stderr_dbs), (new_executions_count))
Exemplo n.º 18
0
 def on_iteration(self):
     super(TriggerWatcher, self).on_iteration()
     concurrency.sleep(seconds=self.sleep_interval)
Exemplo n.º 19
0
 def on_consume_end(self, connection, channel):
     super(TriggerWatcher, self).on_consume_end(connection=connection,
                                                channel=channel)
     concurrency.sleep(seconds=self.sleep_interval)
Exemplo n.º 20
0
 def run(self):
     while True:
         self.poll()
         concurrency.sleep(self._poll_interval)
Exemplo n.º 21
0
    def run(self, connection, wrapped_callback):
        """
        Run the wrapped_callback in a protective covering of retries and error handling.

        :param connection: Connection to messaging service
        :type connection: kombu.connection.Connection

        :param wrapped_callback: Callback that will be wrapped by all the fine handling in this
                                 method. Expected signature of callback -
                                 ``def func(connection, channel)``
        """
        should_stop = False
        channel = None
        while not should_stop:
            try:
                channel = connection.channel()
                wrapped_callback(connection=connection, channel=channel)
                should_stop = True
            except connection.connection_errors + connection.channel_errors as e:
                should_stop, wait = self._retry_context.test_should_stop(e)
                # reset channel to None to avoid any channel closing errors. At this point
                # in case of an exception there should be no channel but that is better to
                # guarantee.
                channel = None
                # All attempts to re-establish connections have failed. This error needs to
                # be notified so raise.
                if should_stop:
                    raise

                # -1, 0 and 1+ are handled properly by eventlet.sleep
                self._logger.debug(
                    'Received RabbitMQ server error, sleeping for %s seconds '
                    'before retrying: %s' % (wait, six.text_type(e)))
                concurrency.sleep(wait)

                connection.close()

                # ensure_connection will automatically switch to an alternate. Other connections
                # in the pool will be fixed independently. It would be nice to cut-over the
                # entire ConnectionPool simultaneously but that would require writing our own
                # ConnectionPool. If a server recovers it could happen that the same process
                # ends up talking to separate nodes in a cluster.

                def log_error_on_conn_failure(exc, interval):
                    self._logger.debug(
                        'Failed to re-establish connection to RabbitMQ server, '
                        'retrying in %s seconds: %s' %
                        (interval, six.text_type(e)))

                try:
                    # NOTE: This function blocks and tries to restablish a connection for
                    # indefinetly if "max_retries" argument is not specified
                    connection.ensure_connection(
                        max_retries=self._ensure_max_retries,
                        errback=log_error_on_conn_failure)
                except Exception:
                    self._logger.exception(
                        'Connections to RabbitMQ cannot be re-established: %s',
                        six.text_type(e))
                    raise
            except Exception as e:
                self._logger.exception(
                    'Connections to RabbitMQ cannot be re-established: %s',
                    six.text_type(e))
                # Not being able to publish a message could be a significant issue for an app.
                raise
            finally:
                if should_stop and channel:
                    try:
                        channel.close()
                    except Exception:
                        self._logger.warning('Error closing channel.',
                                             exc_info=True)