示例#1
0
def test_chained_health_interface():
    hi = ChainedStatusChecker([])
    assert hi.status is None

    hi = ChainedStatusChecker([Healthy()])
    assert hi.status is None

    si1 = EventHealth()
    si2 = EventHealth()
    chained_si = ChainedStatusChecker([si1, si2])

    for si in (si1, si2):
        assert not si.started.is_set()
    chained_si.start()
    for si in (si1, si2):
        assert si.started.is_set()

    assert chained_si.status is None
    reason = StatusResult('derp', TaskState.Value('TASK_FAILED'))
    si2.set_status(reason)
    assert chained_si.status == reason
    assert chained_si.status.reason == 'derp'
    assert TaskState.Name(chained_si.status.status) == 'TASK_FAILED'

    for si in (si1, si2):
        assert not si.stopped.is_set()
    chained_si.stop()
    for si in (si1, si2):
        assert si.stopped.is_set()
示例#2
0
 def status(self):
     if self.threaded_health_checker.healthy:
         if self.threaded_health_checker.running:
             return StatusResult('Task is healthy.',
                                 TaskState.Value('TASK_RUNNING'))
         else:
             return StatusResult(None, TaskState.Value('TASK_STARTING'))
     return StatusResult(
         'Failed health check! %s' % self.threaded_health_checker.reason,
         TaskState.Value('TASK_FAILED'))
示例#3
0
 def run(self):
     while True:
         status_result = self._status_checker.status
         if status_result is not None:
             log.info('Status manager got %s' % status_result)
             if status_result.status == TaskState.Value('TASK_RUNNING'):
                 if not self._running_callback_dispatched:
                     self._running_callback(status_result)
                     self._running_callback_dispatched = True
             elif status_result.status != TaskState.Value('TASK_STARTING'):
                 self._unhealthy_callback(status_result)
                 break
         self._clock.sleep(self.POLL_WAIT.as_(Time.SECONDS))
示例#4
0
  def test_integration_failed(self):
    with self.yield_sleepy(ThermosTaskRunner, sleep=0, exit_code=1) as task_runner:
      task_runner.start()
      task_runner.forked.wait()

      self.run_to_completion(task_runner)

      assert task_runner.status is not None
      assert TaskState.Name(task_runner.status.status) == 'TASK_FAILED'

      # no-op
      task_runner.stop()

      assert task_runner.status is not None
      assert TaskState.Name(task_runner.status.status) == 'TASK_FAILED'
示例#5
0
  def test_consecutive_failures(self):
    '''Verify that a task is unhealthy only after max_consecutive_failures is exceeded'''
    initial_interval_secs = 2
    interval_secs = 1
    self.append_health_checks(False, num_calls=2)
    self.append_health_checks(True)
    self.append_health_checks(False, num_calls=3)
    hct = HealthChecker(
        self._checker.health,
        interval_secs=interval_secs,
        initial_interval_secs=initial_interval_secs,
        max_consecutive_failures=2,
        clock=self._clock)
    hct.start()

    # 2 consecutive health check failures followed by a successful health check.
    self._clock.tick(initial_interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    assert hct.status is None

    # 3 consecutive health check failures.
    self._clock.tick(interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    assert hct.status is None
    self._clock.tick(interval_secs)
    thread_yield()
    assert hct.status.status == TaskState.Value('TASK_FAILED')
    hct.stop()
    assert self._checker.health.call_count == 6
 def test_initial_interval_whatev(self):
     self.append_health_checks(False)
     hct = HealthChecker(self._checker.health,
                         interval_secs=5,
                         initial_interval_secs=0,
                         clock=self._clock)
     hct.start()
     assert hct.status.status == TaskState.Value('TASK_FAILED')
     hct.stop()
     assert self._checker.health.call_count == 1
示例#7
0
  def test_integration_stop(self):
    with self.yield_sleepy(ThermosTaskRunner, sleep=1000, exit_code=0) as task_runner:
      task_runner.start()
      task_runner.forked.wait()

      assert task_runner.status is None

      task_runner.stop()

      assert task_runner.status is not None
      assert TaskState.Name(task_runner.status.status) == 'TASK_KILLED'
示例#8
0
    def test_consecutive_failures(self):
        '''Verify that a task is unhealthy only after max_consecutive_failures is exceeded'''
        initial_interval_secs = 2
        interval_secs = 1
        self.append_health_checks(False, num_calls=2)
        self.append_health_checks(True)
        self.append_health_checks(False, num_calls=3)
        hct = HealthChecker(self._checker.health,
                            interval_secs=interval_secs,
                            initial_interval_secs=initial_interval_secs,
                            max_consecutive_failures=2,
                            clock=self._clock)
        hct.start()
        self._clock.converge(threads=[hct.threaded_health_checker])

        # 2 consecutive health check failures followed by a successful health check.
        epsilon = 0.001
        self._clock.tick(initial_interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status is None
        assert hct.metrics.sample()['consecutive_failures'] == 1
        self._clock.tick(interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status is None
        assert hct.metrics.sample()['consecutive_failures'] == 2
        self._clock.tick(interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status is None
        assert hct.metrics.sample()['consecutive_failures'] == 0

        # 3 consecutive health check failures.
        self._clock.tick(interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status is None
        assert hct.metrics.sample()['consecutive_failures'] == 1
        self._clock.tick(interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status is None
        assert hct.metrics.sample()['consecutive_failures'] == 2
        self._clock.tick(interval_secs + epsilon)
        self._clock.converge(threads=[hct.threaded_health_checker])
        self._clock.assert_waiting(hct.threaded_health_checker, amount=1)
        assert hct.status.status == TaskState.Value('TASK_FAILED')
        assert hct.metrics.sample()['consecutive_failures'] == 3
        hct.stop()
        assert self._checker.health.call_count == 6
示例#9
0
    def status(self):
        """
      Return status that is computed from the statuses of the StatusCheckers. The computed status
      is based on the priority given below (in increasing order of priority).

      None             -> healthy (lowest-priority)
      TASK_RUNNING     -> healthy and running
      TASK_STARTING    -> healthy but still in starting
      Otherwise        -> unhealthy (highest-priority)
    """
        if not self._in_terminal_state():
            cur_status = None
            for status_checker in self._status_checkers:
                status_result = status_checker.status
                if status_result is not None:
                    log.info(
                        '%s reported %s' %
                        (status_checker.__class__.__name__, status_result))
                    if not isinstance(status_result, StatusResult):
                        raise TypeError(
                            'StatusChecker returned something other than a StatusResult: got %s'
                            % type(status_result))
                    if status_result.status == TaskState.Value(
                            'TASK_STARTING'):
                        # TASK_STARTING overrides other statuses
                        cur_status = status_result
                    elif status_result.status == TaskState.Value(
                            'TASK_RUNNING'):
                        if cur_status is None or cur_status == TaskState.Value(
                                'TASK_RUNNING'):
                            # TASK_RUNNING needs consensus (None is also included)
                            cur_status = status_result
                    else:
                        # Any other status leads to a terminal state
                        self._status = status_result
                        return self._status
            self._status = cur_status
        return self._status
示例#10
0
 def test_initial_interval_2x(self):
   self.append_health_checks(False)
   hct = HealthChecker(self._checker.health, interval_secs=5, clock=self._clock)
   hct.start()
   thread_yield()
   assert hct.status is None
   self._clock.tick(6)
   assert hct.status is None
   self._clock.tick(3)
   assert hct.status is None
   self._clock.tick(5)
   thread_yield()
   assert hct.status.status == TaskState.Value('TASK_FAILED')
   hct.stop()
   assert self._checker.health.call_count == 1
示例#11
0
 def test_initial_interval_whatev(self):
     self.append_health_checks(False, 2)
     hct = HealthChecker(self._checker.health,
                         interval_secs=5,
                         initial_interval_secs=0,
                         clock=self._clock)
     hct.start()
     self._clock.converge(threads=[hct.threaded_health_checker])
     self._clock.assert_waiting(hct.threaded_health_checker, amount=5)
     assert hct.status.status == TaskState.Value('TASK_FAILED')
     hct.stop()
     # this is an implementation detail -- we healthcheck in the initializer and
     # healthcheck in the run loop.  if we ever change the implementation, expect
     # this to break.
     assert self._checker.health.call_count == 2
示例#12
0
  def test_integration_quitquitquit(self):
    ignorant_script = ';'.join([
        'import time, signal',
        'signal.signal(signal.SIGTERM, signal.SIG_IGN)',
        'time.sleep(1000)'
    ])

    class ShortPreemptionThermosTaskRunner(ThermosTaskRunner):
      THERMOS_PREEMPTION_WAIT = Amount(1, Time.SECONDS)

    with self.yield_runner(
        ShortPreemptionThermosTaskRunner,
        command="%s -c '%s'" % (sys.executable, ignorant_script)) as task_runner:

      task_runner.start()
      task_runner.forked.wait()
      task_runner.stop(timeout=Amount(5, Time.SECONDS))
      assert task_runner.status is not None
      assert TaskState.Name(task_runner.status.status) == 'TASK_KILLED'
示例#13
0
 def test_initial_interval_2x(self):
     self.append_health_checks(False)
     hct = HealthChecker(self._checker.health,
                         interval_secs=5,
                         clock=self._clock)
     hct.start()
     assert self._clock.converge(threads=[hct.threaded_health_checker])
     self._clock.assert_waiting(hct.threaded_health_checker, 10)
     assert hct.status is None
     self._clock.tick(6)
     assert self._clock.converge(threads=[hct.threaded_health_checker])
     assert hct.status is None
     self._clock.tick(3)
     assert self._clock.converge(threads=[hct.threaded_health_checker])
     assert hct.status is None
     self._clock.tick(5)
     assert self._clock.converge(threads=[hct.threaded_health_checker])
     assert hct.status.status == TaskState.Value('TASK_FAILED')
     hct.stop()
     assert self._checker.health.call_count == 1
示例#14
0
 def status(self):
     if not self.threaded_health_checker.healthy:
         return StatusResult(
             'Failed health check! %s' %
             self.threaded_health_checker.reason,
             TaskState.Value('TASK_FAILED'))
示例#15
0
 def __init__(self, reason, status):
   self._reason = reason
   if status not in TaskState.values():
     raise ValueError('Unknown task state: %r' % status)
   self._status = status
示例#16
0
 def __init__(self, reason, status):
     self._reason = reason
     if status not in TaskState.values():
         raise ValueError('Unknown task state: %r' % status)
     self._status = status
示例#17
0
 def __repr__(self):
     return '%s(%r, status=%r)' % (self.__class__.__name__, self._reason,
                                   TaskState.Name(self._status))
示例#18
0
 def unhealthy_callback(result):
     assert result == StatusResult('Fake reason',
                                   TaskState.Value('TASK_KILLED'))
     self.unhealthy_callback_called = True
示例#19
0
 def _in_terminal_state(self):
     return (self._status is not None
             and self._status.status != TaskState.Value('TASK_RUNNING')
             and self._status.status != TaskState.Value('TASK_STARTING'))
示例#20
0
 def test_run_with_running_status(self):
     self.do_test_run_with_status(
         StatusResult(None, TaskState.Value('TASK_RUNNING')), 1)
示例#21
0
 def status(self):
     if self._killed:
         return StatusResult(self._reason, TaskState.Value('TASK_KILLED'))
示例#22
0
 def __init__(self):
     self._status = StatusResult(
         'No health-check defined, task is assumed healthy.',
         TaskState.Value('TASK_RUNNING'))
示例#23
0
 def status(self):
   if self.call_count == 2:
     return TaskState.Value('TASK_KILLED')
   self.call_count += 1
   return None
示例#24
0
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import threading

import pytest
from mesos.interface.mesos_pb2 import TaskState

from apache.aurora.executor.common.status_checker import (ChainedStatusChecker,
                                                          Healthy,
                                                          StatusChecker,
                                                          StatusResult)

TASK_STARTING = StatusResult(None, TaskState.Value('TASK_STARTING'))
TASK_RUNNING = StatusResult(None, TaskState.Value('TASK_RUNNING'))
TASK_FAILED = StatusResult(None, TaskState.Value('TASK_FAILED'))


class EventHealth(StatusChecker):
    def __init__(self, status=None):
        self.started = threading.Event()
        self.stopped = threading.Event()
        self._status = status

    @property
    def status(self):
        return self._status

    def set_status(self, status):
示例#25
0
 def status(self):
     if self.call_count == 2:
         return StatusResult('Fake reason', TaskState.Value('TASK_KILLED'))
     self.call_count += 1
     return self._status
示例#26
0
 def test_run_with_starting_status(self):
     self.do_test_run_with_status(
         StatusResult(None, TaskState.Value('TASK_STARTING')), 0)
示例#27
0
 def callback(result):
   assert result == TaskState.Value('TASK_KILLED')
   self.callback_called = True
示例#28
0
 def running_callback(result):
     assert result == StatusResult(None,
                                   TaskState.Value('TASK_RUNNING'))
     self.running_callback_called += 1