Пример #1
0
  def from_assigned_task(self, assigned_task, sandbox):
    """
    :param assigned_task:
    :param sandbox:
    :return: Instance of a HealthChecker.
    """
    mesos_task = mesos_task_instance_from_assigned_task(assigned_task)
    health_check_config = mesos_task.health_check_config().get()
    health_checker = health_check_config.get('health_checker', {})
    timeout_secs = health_check_config.get('timeout_secs')
    if SHELL_HEALTH_CHECK in health_checker:
      shell_command = health_checker.get(SHELL_HEALTH_CHECK, {}).get('shell_command')
      # Filling in variables eg thermos.ports[http] that could have been passed in as part of
      # shell_command.
      interpolated_command = HealthCheckerProvider.interpolate_cmd(
        task=assigned_task,
        cmd=shell_command
      )
      # If we do not want user which is job's role to execute the health shell check
      # --nosetuid-health-checks should be passed in as an argument to the executor.
      demote_to_job_role_user = None
      if not self.nosetuid_health_checks:
        pw_entry = pwd.getpwnam(assigned_task.task.job.role)
        def demote_to_job_role_user():
          os.setgid(pw_entry.pw_gid)
          os.setuid(pw_entry.pw_uid)

      shell_signaler = ShellHealthCheck(cmd=interpolated_command,
        preexec_fn=demote_to_job_role_user,
        timeout_secs=timeout_secs)
      a_health_checker = lambda: shell_signaler()
    else:
      portmap = resolve_ports(mesos_task, assigned_task.assignedPorts)
      if 'health' not in portmap:
        return None
      http_config = health_checker.get(HTTP_HEALTH_CHECK, {})
      http_endpoint = http_config.get('endpoint')
      http_expected_response = http_config.get('expected_response')
      http_expected_response_code = http_config.get('expected_response_code')

      http_signaler = HttpSignaler(
        portmap['health'],
        timeout_secs=timeout_secs)
      a_health_checker = lambda: http_signaler(
        endpoint=http_endpoint,
        expected_response=http_expected_response,
        expected_response_code=http_expected_response_code
      )

    health_checker = HealthChecker(
      a_health_checker,
      sandbox,
      interval_secs=health_check_config.get('interval_secs'),
      initial_interval_secs=health_check_config.get('initial_interval_secs'),
      max_consecutive_failures=health_check_config.get('max_consecutive_failures'))

    return health_checker
Пример #2
0
    def _terminate_http(self):
        http_signaler = HttpSignaler(self._lifecycle_port)

        for endpoint in self._escalation_endpoints:
            handled, _ = http_signaler(endpoint, use_post_method=True)

            if handled:
                self._clock.sleep(self.ESCALATION_WAIT.as_(Time.SECONDS))
                if self._runner.status is not None:
                    return True
Пример #3
0
    def test_exception(self):
        self._mox.StubOutWithMock(urllib_request, 'urlopen')
        urllib_request.urlopen('http://localhost:%s/health' % self.PORT,
                               None,
                               timeout=1.0).AndRaise(
                                   SocketTimeout('Timed out'))

        self._mox.ReplayAll()

        assert not HttpSignaler(self.PORT)('/health',
                                           expected_response='ok')[0]
Пример #4
0
    def test_health_checks(self):
        self._mox.StubOutWithMock(urllib_request, 'urlopen')
        urllib_request.urlopen('http://localhost:%s/health' % self.PORT,
                               None,
                               timeout=1.0).AndReturn(OpenedURL('ok'))
        urllib_request.urlopen('http://localhost:%s/health' % self.PORT,
                               None,
                               timeout=1.0).AndReturn(OpenedURL('not ok'))
        urllib_request.urlopen('http://localhost:%s/health' % self.PORT,
                               None,
                               timeout=1.0).AndReturn(
                                   OpenedURL('not ok', code=200))
        urllib_request.urlopen('http://localhost:%s/health' % self.PORT,
                               None,
                               timeout=1.0).AndReturn(OpenedURL('ok',
                                                                code=400))
        urllib_request.urlopen('http://localhost:%s/health' % self.PORT,
                               None,
                               timeout=1.0).AndRaise(
                                   urllib_request.HTTPError(
                                       '', 501, '', None, None))
        urllib_request.urlopen('http://localhost:%s/health' % self.PORT,
                               None,
                               timeout=1.0).AndReturn(OpenedURL('ok',
                                                                code=200))
        urllib_request.urlopen('http://localhost:%s/random/endpoint' %
                               self.PORT,
                               None,
                               timeout=1.0).AndReturn(OpenedURL('ok'))

        self._mox.ReplayAll()

        signaler = HttpSignaler(self.PORT)
        assert signaler('/health', expected_response='ok') == (True, None)
        assert signaler('/health', expected_response='ok') == (
            False,
            'Response differs from expected response (expected "ok", got "not ok")'
        )
        assert signaler('/health', expected_response_code=200) == (True, None)
        assert signaler('/health', expected_response_code=200) == (
            False,
            'Response code differs from expected response (expected 200, got 400)'
        )
        assert signaler('/health', expected_response_code=200) == (
            False,
            'Response code differs from expected response (expected 200, got 501)'
        )
        assert signaler('/health',
                        expected_response='ok',
                        expected_response_code=200) == (True, None)
        assert signaler('/random/endpoint',
                        expected_response='ok') == (True, None)
Пример #5
0
    def from_assigned_task(self, assigned_task, sandbox):
        """
    :param assigned_task:
    :param sandbox:
    :return: Instance of a HealthChecker.
    """
        mesos_task = mesos_task_instance_from_assigned_task(assigned_task)
        health_check_config = mesos_task.health_check_config().get()
        health_checker = health_check_config.get('health_checker', {})
        timeout_secs = health_check_config.get('timeout_secs')
        if SHELL_HEALTH_CHECK in health_checker:
            shell_command = health_checker.get(SHELL_HEALTH_CHECK,
                                               {}).get('shell_command')
            shell_signaler = ShellHealthCheck(cmd=shell_command,
                                              timeout_secs=timeout_secs)
            a_health_checker = lambda: shell_signaler()
        else:
            portmap = resolve_ports(mesos_task, assigned_task.assignedPorts)
            if 'health' not in portmap:
                return None
            if HTTP_HEALTH_CHECK in health_checker:
                # Assume user has already switched over to the new config since we found the key.
                http_config = health_checker.get(HTTP_HEALTH_CHECK, {})
                http_endpoint = http_config.get('endpoint')
                http_expected_response = http_config.get('expected_response')
                http_expected_response_code = http_config.get(
                    'expected_response_code')
            else:
                # TODO (AURORA-1563): Remove this clause after we deprecate support for following keys
                # directly in HealthCheckConfig
                http_endpoint = health_check_config.get('endpoint')
                http_expected_response = health_check_config.get(
                    'expected_response')
                http_expected_response_code = health_check_config.get(
                    'expected_response_code')
            http_signaler = HttpSignaler(portmap['health'],
                                         timeout_secs=timeout_secs)
            a_health_checker = lambda: http_signaler(
                endpoint=http_endpoint,
                expected_response=http_expected_response,
                expected_response_code=http_expected_response_code)

        health_checker = HealthChecker(
            a_health_checker,
            sandbox,
            interval_secs=health_check_config.get('interval_secs'),
            initial_interval_secs=health_check_config.get(
                'initial_interval_secs'),
            max_consecutive_failures=health_check_config.get(
                'max_consecutive_failures'))

        return health_checker
Пример #6
0
  def from_assigned_task(self, assigned_task, sandbox):
    """
    :param assigned_task:
    :param sandbox:
    :return: Instance of a HealthChecker.
    """
    mesos_task = mesos_task_instance_from_assigned_task(assigned_task)
    health_check_config = mesos_task.health_check_config().get()
    health_checker = health_check_config.get('health_checker', {})
    timeout_secs = health_check_config.get('timeout_secs')
    if SHELL_HEALTH_CHECK in health_checker:
      shell_command = health_checker.get(SHELL_HEALTH_CHECK, {}).get('shell_command')
      # Filling in variables eg thermos.ports[http] that could have been passed in as part of
      # shell_command.
      interpolated_command = HealthCheckerProvider.interpolate_cmd(
        task=assigned_task,
        cmd=shell_command
      )
      shell_signaler = ShellHealthCheck(
        cmd=interpolated_command,
        timeout_secs=timeout_secs,
      )
      a_health_checker = lambda: shell_signaler()
    else:
      portmap = resolve_ports(mesos_task, assigned_task.assignedPorts)
      if 'health' not in portmap:
        return None
      http_config = health_checker.get(HTTP_HEALTH_CHECK, {})
      http_endpoint = http_config.get('endpoint')
      http_expected_response = http_config.get('expected_response')
      http_expected_response_code = http_config.get('expected_response_code')

      http_signaler = HttpSignaler(
        portmap['health'],
        timeout_secs=timeout_secs)
      a_health_checker = lambda: http_signaler(
        endpoint=http_endpoint,
        expected_response=http_expected_response,
        expected_response_code=http_expected_response_code
      )

    health_checker = HealthChecker(
      a_health_checker,
      sandbox,
      interval_secs=health_check_config.get('interval_secs'),
      initial_interval_secs=health_check_config.get('initial_interval_secs'),
      max_consecutive_failures=health_check_config.get('max_consecutive_failures'))

    return health_checker
Пример #7
0
    def test_all_calls_ok(self):
        self._mox.StubOutWithMock(urllib_request, 'urlopen')
        urllib_request.urlopen('http://localhost:%s/quitquitquit' % self.PORT,
                               '',
                               timeout=1.0).AndReturn(OpenedURL(''))
        urllib_request.urlopen('http://localhost:%s/abortabortabort' %
                               self.PORT,
                               '',
                               timeout=1.0).AndReturn(OpenedURL(''))

        self._mox.ReplayAll()

        signaler = HttpSignaler(self.PORT)
        assert signaler('/quitquitquit', use_post_method=True) == (True, None)
        assert signaler('/abortabortabort',
                        use_post_method=True) == (True, None)
Пример #8
0
    def _terminate_http(self):
        http_signaler = HttpSignaler(self._lifecycle_port)

        for endpoint, wait_time in self._escalation_endpoints:
            handled, _ = http_signaler(endpoint, use_post_method=True)
            log.info('Killing task, calling %s and waiting %s, handled is %s' %
                     (endpoint, str(wait_time), str(handled)))

            waited = Amount(0, Time.SECONDS)
            while handled:
                if self._runner.status is not None:
                    return True
                if waited >= wait_time:
                    break

                self._clock.sleep(self.WAIT_POLL_INTERVAL.as_(Time.SECONDS))
                waited += self.WAIT_POLL_INTERVAL
Пример #9
0
  def from_assigned_task(self, assigned_task, sandbox):
    """
    :param assigned_task:
    :param sandbox:
    :return: Instance of a HealthChecker.
    """
    mesos_task = mesos_task_instance_from_assigned_task(assigned_task)
    health_check_config = mesos_task.health_check_config().get()
    health_checker = health_check_config.get('health_checker', {})
    timeout_secs = health_check_config.get('timeout_secs')
    if SHELL_HEALTH_CHECK in health_checker:
      shell_command = health_checker.get(SHELL_HEALTH_CHECK, {}).get('shell_command')

      # Filling in variables e.g. thermos.ports[http] that could have been passed in as part of
      # shell_command.
      interpolated_command = HealthCheckerProvider.interpolate_cmd(
        task=assigned_task,
        cmd=shell_command)

      # If we do not want the health check to execute as the user from the job's role
      # --nosetuid-health-checks should be passed as an argument to the executor.
      demote_to_job_role_user = None
      if not self._nosetuid_health_checks and not sandbox.is_filesystem_image:
        pw_entry = pwd.getpwnam(assigned_task.task.job.role)
        def demote_to_job_role_user():
          os.setgid(pw_entry.pw_gid)
          os.setuid(pw_entry.pw_uid)

      # If the task is executing in an isolated filesystem we'll want to wrap the health check
      # command within a mesos-containerizer invocation so that it's executed within that
      # filesystem.
      wrapper = None
      if sandbox.is_filesystem_image:
        health_check_user = (os.getusername() if self._nosetuid_health_checks
            else assigned_task.task.job.role)
        def wrapper(cmd):
          return wrap_with_mesos_containerizer(
              cmd,
              health_check_user,
              sandbox.container_root,
              self._mesos_containerizer_path)

      shell_signaler = ShellHealthCheck(
        cmd=interpolated_command,
        preexec_fn=demote_to_job_role_user,
        timeout_secs=timeout_secs,
        wrapper_fn=wrapper)
      a_health_checker = lambda: shell_signaler()
    else:
      portmap = resolve_ports(mesos_task, assigned_task.assignedPorts)
      if 'health' not in portmap:
        return None
      http_config = health_checker.get(HTTP_HEALTH_CHECK, {})
      http_endpoint = http_config.get('endpoint')
      http_expected_response = http_config.get('expected_response')
      http_expected_response_code = http_config.get('expected_response_code')

      http_signaler = HttpSignaler(
        portmap['health'],
        timeout_secs=timeout_secs)
      a_health_checker = lambda: http_signaler(
        endpoint=http_endpoint,
        expected_response=http_expected_response,
        expected_response_code=http_expected_response_code
      )

    health_checker = HealthChecker(
      a_health_checker,
      sandbox,
      interval_secs=health_check_config.get('interval_secs'),
      initial_interval_secs=health_check_config.get('initial_interval_secs'),
      max_consecutive_failures=health_check_config.get('max_consecutive_failures'))

    return health_checker