def test_from_assigned_task_shell(self): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs hct_max_fail = health_checker.threaded_health_checker.max_consecutive_failures assert hct_max_fail == max_consecutive_failures
def test_from_assigned_task_shell_no_demotion(self, mock_getpwnam): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig( job=JobKey(role='role', environment='env', name='name'), executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' health_checker = HealthCheckerProvider( nosetuid_health_checks=True).from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs hct_max_fail = health_checker.threaded_health_checker.max_consecutive_failures assert hct_max_fail == max_consecutive_failures # Should not be trying to access role's user info. assert not mock_getpwnam.called
def test_basic_as_job(self): proxy_driver = ProxyDriver() with temporary_dir() as tempdir: te = AuroraExecutor(runner_provider=make_provider(tempdir), sandbox_provider=DefaultTestSandboxProvider(), status_providers=[HealthCheckerProvider()]) te.launchTask(proxy_driver, make_task(MESOS_JOB(task=HELLO_WORLD), instanceId=0)) te.runner_started.wait() while te._status_manager is None: time.sleep(0.1) te.terminated.wait() tm = TaskMonitor(tempdir, task_id=HELLO_WORLD_TASK_ID) runner_state = tm.get_state() assert 'hello_world_hello_world-001' in runner_state.processes, ( 'Could not find processes, got: %s' % ' '.join(runner_state.processes)) updates = proxy_driver.method_calls['sendStatusUpdate'] assert len(updates) == 3 status_updates = [arg_tuple[0][0] for arg_tuple in updates] assert status_updates[0].state == mesos_pb2.TASK_STARTING assert status_updates[1].state == mesos_pb2.TASK_RUNNING assert status_updates[2].state == mesos_pb2.TASK_FINISHED
def test_from_assigned_task_http_endpoint_style_config(self): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 http_config = HttpHealthChecker(endpoint='/foo', expected_response='bar', expected_response_code=201) task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(http=http_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=7)).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'health': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) http_exec_config = execconfig_data['health_check_config'][ 'health_checker']['http'] assert http_exec_config['endpoint'] == '/foo' assert http_exec_config['expected_response'] == 'bar' assert http_exec_config['expected_response_code'] == 201 health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs
def test_interpolate_cmd(self): """Making sure thermos.ports[foo] gets correctly substituted with assignedPorts info.""" interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_cmd = 'FOO_PORT={{thermos.ports[foo]}} failed command' shell_config = ShellHealthChecker(shell_command=shell_cmd) task_config = TaskConfig( executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, ) ).json_dumps() ) ) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) interpolated_cmd = HealthCheckerProvider.interpolate_cmd( assigned_task, cmd=shell_cmd ) assert interpolated_cmd == 'FOO_PORT=9001 failed command'
def test_interpolate_cmd(self): """Making sure thermos.ports[foo] gets correctly substituted with assignedPorts info.""" interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_cmd = 'FOO_PORT={{thermos.ports[foo]}} failed command' shell_config = ShellHealthChecker(shell_command=shell_cmd) task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) interpolated_cmd = HealthCheckerProvider.interpolate_cmd(assigned_task, cmd=shell_cmd) assert interpolated_cmd == 'FOO_PORT=9001 failed command'
def initialize(options): cwd_path = os.path.abspath(CWD) checkpoint_root = os.path.join(cwd_path, MesosPathDetector.DEFAULT_SANDBOX_PATH) # status providers: status_providers = [ HealthCheckerProvider(), ResourceManagerProvider(checkpoint_root=checkpoint_root) ] if options.announcer_enable: log.warn( 'Please remove the deprecated and no-op --announcer-enable flag in scheduler config!' ) if options.announcer_ensemble is not None: status_providers.append( DefaultAnnouncerCheckerProvider( options.announcer_ensemble, options.announcer_serverset_path, options.announcer_allow_custom_serverset_path, options.announcer_hostname)) # Create executor stub if options.execute_as_user or options.nosetuid: # If nosetuid is set, execute_as_user is also None thermos_runner_provider = UserOverrideThermosTaskRunnerProvider( dump_runner_pex(), checkpoint_root, artifact_dir=cwd_path, process_logger_destination=options.runner_logger_destination, process_logger_mode=options.runner_logger_mode, rotate_log_size_mb=options.runner_rotate_log_size_mb, rotate_log_backups=options.runner_rotate_log_backups, preserve_env=options.preserve_env) thermos_runner_provider.set_role(None) thermos_executor = AuroraExecutor( runner_provider=thermos_runner_provider, status_providers=status_providers, sandbox_provider=UserOverrideDirectorySandboxProvider( options.execute_as_user)) else: thermos_runner_provider = DefaultThermosTaskRunnerProvider( dump_runner_pex(), checkpoint_root, artifact_dir=cwd_path, process_logger_destination=options.runner_logger_destination, process_logger_mode=options.runner_logger_mode, rotate_log_size_mb=options.runner_rotate_log_size_mb, rotate_log_backups=options.runner_rotate_log_backups, preserve_env=options.preserve_env) thermos_executor = AuroraExecutor( runner_provider=thermos_runner_provider, status_providers=status_providers) return thermos_executor
def initialize(options): cwd_path = os.path.abspath(CWD) checkpoint_root = os.path.join(cwd_path, MesosPathDetector.DEFAULT_SANDBOX_PATH) # status providers: status_providers = [ HealthCheckerProvider(), ResourceManagerProvider(checkpoint_root=checkpoint_root) ] if options.announcer_enable: if options.announcer_ensemble is None: app.error( 'Must specify --announcer-ensemble if the announcer is enabled.' ) status_providers.append( DefaultAnnouncerCheckerProvider(options.announcer_ensemble, options.announcer_serverset_path)) # Create executor stub if options.execute_as_user or options.nosetuid: # If nosetuid is set, execute_as_user is also None thermos_runner_provider = UserOverrideThermosTaskRunnerProvider( dump_runner_pex(), checkpoint_root, artifact_dir=cwd_path, process_logger_mode=options.runner_logger_mode, rotate_log_size_mb=options.runner_rotate_log_size_mb, rotate_log_backups=options.runner_rotate_log_backups, preserve_env=options.preserve_env) thermos_runner_provider.set_role(None) thermos_executor = AuroraExecutor( runner_provider=thermos_runner_provider, status_providers=status_providers, sandbox_provider=UserOverrideDirectorySandboxProvider( options.execute_as_user)) else: thermos_runner_provider = DefaultThermosTaskRunnerProvider( dump_runner_pex(), checkpoint_root, artifact_dir=cwd_path, process_logger_mode=options.runner_logger_mode, rotate_log_size_mb=options.runner_rotate_log_size_mb, rotate_log_backups=options.runner_rotate_log_backups, preserve_env=options.preserve_env) thermos_executor = AuroraExecutor( runner_provider=thermos_runner_provider, status_providers=status_providers) return thermos_executor
def main(args, options): if MesosExecutorDriver is None: app.error('Could not load MesosExecutorDriver!') # status providers: status_providers = [ HealthCheckerProvider(), ResourceManagerProvider(checkpoint_root=options.checkpoint_root) ] if options.announcer_enable: if options.announcer_ensemble is None: app.error( 'Must specify --announcer-ensemble if the announcer is enabled.' ) status_providers.append( DefaultAnnouncerCheckerProvider( options.announcer_ensemble, options.announcer_serverset_path)) # Create executor stub if options.execute_as_user or options.nosetuid: # If nosetuid is set, execute_as_user is also None thermos_runner_provider = UserOverrideThermosTaskRunnerProvider( dump_runner_pex(), artifact_dir=os.path.abspath(CWD)) thermos_runner_provider.set_role(None) thermos_executor = AuroraExecutor( runner_provider=thermos_runner_provider, status_providers=status_providers, sandbox_provider=UserOverrideDirectorySandboxProvider( options.execute_as_user)) else: thermos_runner_provider = DefaultThermosTaskRunnerProvider( dump_runner_pex(), artifact_dir=os.path.abspath(CWD)) thermos_executor = AuroraExecutor( runner_provider=thermos_runner_provider, status_providers=status_providers) # Create driver stub driver = MesosExecutorDriver(thermos_executor) # This is an ephemeral executor -- shutdown if we receive no tasks within a certain # time period ExecutorTimeout(thermos_executor.launched, driver).start() # Start executor driver.run() log.info('MesosExecutorDriver.run() has finished.')
def test_from_assigned_task_shell_filesystem_image(self, mock_getpwnam): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig( job=JobKey(role='role', environment='env', name='name'), executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' mock_sandbox = mock.Mock(spec_set=SandboxInterface) type(mock_sandbox).root = mock.PropertyMock(return_value='/some/path') type(mock_sandbox).is_filesystem_image = mock.PropertyMock( return_value=True) with mock.patch( 'apache.aurora.executor.common.health_checker.ShellHealthCheck' ) as mock_shell: HealthCheckerProvider( nosetuid_health_checks=False, mesos_containerizer_path='/some/path/mesos-containerizer' ).from_assigned_task(assigned_task, mock_sandbox) class NotNone(object): def __eq__(self, other): return other is not None assert mock_shell.mock_calls == [ mock.call(cmd='failed command', wrapper_fn=NotNone(), preexec_fn=None, timeout_secs=5.0) ]
def make_executor(proxy_driver, checkpoint_root, task, ports={}, fast_status=False, runner_class=ThermosTaskRunner, status_providers=[HealthCheckerProvider()], assert_task_is_running=True): status_manager_class = FastStatusManager if fast_status else StatusManager runner_provider = make_provider(checkpoint_root, runner_class) te = FastThermosExecutor( runner_provider=runner_provider, status_manager_class=status_manager_class, sandbox_provider=DefaultTestSandboxProvider(), status_providers=status_providers, ) ExecutorTimeout(te.launched, proxy_driver, timeout=Amount(100, Time.MILLISECONDS)).start() task_description = make_task(task, assigned_ports=ports, instanceId=0) te.launchTask(proxy_driver, task_description) te.status_manager_started.wait() while len(proxy_driver.method_calls['sendStatusUpdate']) < 2: time.sleep(0.1) # make sure startup was kosher updates = proxy_driver.method_calls['sendStatusUpdate'] assert len(updates) == 2 status_updates = [arg_tuple[0][0] for arg_tuple in updates] assert status_updates[0].state == mesos_pb2.TASK_STARTING runner = None if assert_task_is_running: assert status_updates[1].state == mesos_pb2.TASK_RUNNING # wait for the runner to bind to a task while True: runner = TaskRunner.get(task_description.task_id.value, checkpoint_root) if runner: break time.sleep(0.1) assert te.launched.is_set() return runner, te
def test_task_health_ok(self): proxy_driver = ProxyDriver() with SignalServer(HealthyHandler) as port: with temporary_dir() as checkpoint_root: health_check_config = HealthCheckConfig(initial_interval_secs=0.1, interval_secs=0.1) _, executor = make_executor(proxy_driver, checkpoint_root, MESOS_JOB(task=SLEEP2, health_check_config=health_check_config), ports={'health': port}, fast_status=True, status_providers=(HealthCheckerProvider(),)) executor.terminated.wait() updates = proxy_driver.method_calls['sendStatusUpdate'] assert len(updates) == 3 assert updates[-1][0][0].state == mesos_pb2.TASK_FINISHED
def test_from_assigned_task(self): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=7)).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'health': 9001}) health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs hct_max_fail = health_checker.threaded_health_checker.max_consecutive_failures assert hct_max_fail == max_consecutive_failures
def test_from_assigned_task_no_health_port(self): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) # No health port and we don't have a shell_command. assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'http': 9001}) health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, None) self.assertIsNone(health_checker)
def test_from_assigned_task_shell(self, mock_getpwnam): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig( job=JobKey(role='role', environment='env', name='name'), executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' mock_sandbox = mock.Mock(spec_set=SandboxInterface) type(mock_sandbox).root = mock.PropertyMock(return_value='/some/path') type(mock_sandbox).is_filesystem_image = mock.PropertyMock( return_value=False) health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, mock_sandbox) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs hct_max_fail = health_checker.threaded_health_checker.max_consecutive_failures assert hct_max_fail == max_consecutive_failures mock_getpwnam.assert_called_once_with(task_config.job.role)
def main(args, options): thermos_runner_provider = DefaultThermosTaskRunnerProvider( dump_runner_pex(), artifact_dir=os.path.realpath('.'), ) # status providers: status_providers = [HealthCheckerProvider()] if options.announcer_enable: if options.announcer_ensemble is None: app.error( 'Must specify --announcer-ensemble if the announcer is enabled.' ) status_providers.append( DefaultAnnouncerCheckerProvider( options.announcer_ensemble, options.announcer_serverset_path)) # Create executor stub thermos_executor = AuroraExecutor( runner_provider=thermos_runner_provider, status_providers=status_providers, ) # Create driver stub driver = MesosExecutorDriver(thermos_executor) # This is an ephemeral executor -- shutdown if we receive no tasks within a certain # time period ExecutorTimeout(thermos_executor.launched, driver).start() # Start executor driver.run() log.info('MesosExecutorDriver.run() has finished.')
def initialize(options): cwd_path = os.path.abspath(CWD) checkpoint_root = os.path.join(cwd_path, MesosPathDetector.DEFAULT_SANDBOX_PATH) # status providers: status_providers = [ HealthCheckerProvider( nosetuid_health_checks=options.nosetuid_health_checks, mesos_containerizer_path=options.mesos_containerizer_path), ResourceManagerProvider(checkpoint_root=checkpoint_root) ] if options.announcer_ensemble is not None: status_providers.append( DefaultAnnouncerCheckerProvider( options.announcer_ensemble, options.announcer_serverset_path, options.announcer_allow_custom_serverset_path, options.announcer_hostname, make_zk_auth(options.announcer_zookeeper_auth_config))) # Create executor stub if options.execute_as_user or options.nosetuid: # If nosetuid is set, execute_as_user is also None thermos_runner_provider = UserOverrideThermosTaskRunnerProvider( dump_runner_pex(), checkpoint_root, artifact_dir=cwd_path, process_logger_destination=options.runner_logger_destination, process_logger_mode=options.runner_logger_mode, rotate_log_size_mb=options.runner_rotate_log_size_mb, rotate_log_backups=options.runner_rotate_log_backups, preserve_env=options.preserve_env, mesos_containerizer_path=options.mesos_containerizer_path) thermos_runner_provider.set_role(None) thermos_executor = AuroraExecutor( runner_provider=thermos_runner_provider, status_providers=status_providers, sandbox_provider=UserOverrideDirectorySandboxProvider( options.execute_as_user), no_sandbox_create_user=options.no_create_user, sandbox_mount_point=options.sandbox_mount_point) else: thermos_runner_provider = DefaultThermosTaskRunnerProvider( dump_runner_pex(), checkpoint_root, artifact_dir=cwd_path, process_logger_destination=options.runner_logger_destination, process_logger_mode=options.runner_logger_mode, rotate_log_size_mb=options.runner_rotate_log_size_mb, rotate_log_backups=options.runner_rotate_log_backups, preserve_env=options.preserve_env, mesos_containerizer_path=options.mesos_containerizer_path) thermos_executor = AuroraExecutor( runner_provider=thermos_runner_provider, status_providers=status_providers, no_sandbox_create_user=options.no_create_user, sandbox_mount_point=options.sandbox_mount_point) return thermos_executor