def test_from_assigned_task_http_endpoint_style_config(self): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 http_config = HttpHealthChecker(endpoint='/foo', expected_response='bar', expected_response_code=201) task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(http=http_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=7)).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'health': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) http_exec_config = execconfig_data['health_check_config'][ 'health_checker']['http'] assert http_exec_config['endpoint'] == '/foo' assert http_exec_config['expected_response'] == 'bar' assert http_exec_config['expected_response_code'] == 201 health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs
def test_from_assigned_task_shell(self): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs hct_max_fail = health_checker.threaded_health_checker.max_consecutive_failures assert hct_max_fail == max_consecutive_failures
def test_interpolate_cmd(self): """Making sure thermos.ports[foo] gets correctly substituted with assignedPorts info.""" interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_cmd = 'FOO_PORT={{thermos.ports[foo]}} failed command' shell_config = ShellHealthChecker(shell_command=shell_cmd) task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) interpolated_cmd = HealthCheckerProvider.interpolate_cmd(assigned_task, cmd=shell_cmd) assert interpolated_cmd == 'FOO_PORT=9001 failed command'
def test_announcer_provider_with_timeout(mock_client_provider, mock_serverset_provider): mock_client = create_autospec(spec=KazooClient, instance=True) mock_client_provider.return_value = mock_client client_connect_event = threading.Event() mock_client.start_async.return_value = client_connect_event mock_serverset = create_autospec(spec=ServerSet, instance=True) mock_serverset_provider.return_value = mock_serverset dap = DefaultAnnouncerCheckerProvider('zookeeper.example.com', root='/aurora') job = make_job('aurora', 'prod', 'proxy', 'primary', portmap={ 'http': 80, 'admin': 'primary' }) health_check_config = HealthCheckConfig(initial_interval_secs=0.1, interval_secs=0.1) job = job(health_check_config=health_check_config) assigned_task = make_assigned_task(job, assigned_ports={'primary': 12345}) checker = dap.from_assigned_task(assigned_task, None) mock_client.start_async.assert_called_once_with() mock_serverset_provider.assert_called_once_with( mock_client, '/aurora/aurora/prod/proxy') checker.start() checker.start_event.wait() assert checker.status is not None
def test_from_assigned_task_shell_no_demotion(self, mock_getpwnam): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig( job=JobKey(role='role', environment='env', name='name'), executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' health_checker = HealthCheckerProvider( nosetuid_health_checks=True).from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs hct_max_fail = health_checker.threaded_health_checker.max_consecutive_failures assert hct_max_fail == max_consecutive_failures # Should not be trying to access role's user info. assert not mock_getpwnam.called
def task_instance_from_job(job, instance): instance_context = MesosContext(instance=instance) # TODO(Sathya): Remove health_check_interval_secs references after deprecation cycle is complete. health_check_config = HealthCheckConfig() if job.has_health_check_interval_secs(): health_check_config = HealthCheckConfig(interval_secs=job.health_check_interval_secs().get()) elif job.has_health_check_config(): health_check_config = job.health_check_config() ti = MesosTaskInstance(task=job.task(), role=job.role(), health_check_interval_secs=health_check_config.interval_secs().get(), health_check_config=health_check_config, instance=instance) if job.has_announce(): ti = ti(announce=job.announce()) if job.has_environment(): ti = ti(environment=job.environment()) return ti.bind(mesos=instance_context).interpolate()
def test_update_config_passes_with_min_consecutive_successes_zero(): base_job = Job( name='hello_world', role='john_doe', cluster='test-cluster', health_check_config=HealthCheckConfig(min_consecutive_successes=0), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB))) config._validate_update_config(AuroraConfig(base_job))
def task_instance_from_job(job, instance): instance_context = MesosContext(instance=instance) # TODO(Sathya): Remove health_check_interval_secs references after deprecation cycle is complete. health_check_config = HealthCheckConfig() if job.has_health_check_interval_secs(): health_check_config = HealthCheckConfig( interval_secs=job.health_check_interval_secs().get()) elif job.has_health_check_config(): health_check_config = job.health_check_config() ti = MesosTaskInstance( task=job.task(), role=job.role(), health_check_interval_secs=health_check_config.interval_secs().get(), health_check_config=health_check_config, instance=instance) if job.has_announce(): ti = ti(announce=job.announce()) if job.has_environment(): ti = ti(environment=job.environment()) return ti.bind(mesos=instance_context).interpolate()
def test_update_config_passes_with_min_requirement_values(): base_job = Job( name='hello_world', role='john_doe', cluster='test-cluster', update_config=UpdateConfig(watch_secs=26), health_check_config=HealthCheckConfig(max_consecutive_failures=1), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB))) config._validate_update_config(AuroraConfig(base_job))
def test_health_check_config_http_ok(): base_job = Job(name='hello_bond', role='james', cluster='marine-cluster', health_check_config=HealthCheckConfig( max_consecutive_failures=1, ), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB))) config._validate_health_check_config(AuroraConfig(base_job))
def test_update_config_fails_with_min_consecutive_successes_negative(): base_job = Job( name='hello_world', role='john_doe', cluster='test-cluster', health_check_config=HealthCheckConfig(min_consecutive_successes=-1), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB))) with pytest.raises(SystemExit): config._validate_update_config(AuroraConfig(base_job))
def test_update_config_fails_insufficient_watch_secs_equal_to_target(): base_job = Job( name='hello_world', role='john_doe', cluster='test-cluster', update_config=UpdateConfig(watch_secs=25), health_check_config=HealthCheckConfig(max_consecutive_failures=1), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB))) with pytest.raises(SystemExit): config._validate_update_config(AuroraConfig(base_job))
def test_health_check_config_invalid_type(): # Must be 'shell' or 'http' type of config. with pytest.raises(ValueError): Job(name='hello_bond', role='james', cluster='marine-cluster', health_check_config=HealthCheckConfig( max_consecutive_failures=1, health_checker='foo', ), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB)))
def test_health_check_config_shell_no_command(): # If we chose shell config, we must define shell_command. base_job = Job( name='hello_bond', role='james', cluster='marine-cluster', health_check_config=HealthCheckConfig( max_consecutive_failures=1, health_checker=HealthCheckerConfig(shell=ShellHealthChecker())), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB))) with pytest.raises(SystemExit): config._validate_health_check_config(AuroraConfig(base_job))
def test_from_assigned_task_shell_filesystem_image(self, mock_getpwnam): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig( job=JobKey(role='role', environment='env', name='name'), executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' mock_sandbox = mock.Mock(spec_set=SandboxInterface) type(mock_sandbox).root = mock.PropertyMock(return_value='/some/path') type(mock_sandbox).is_filesystem_image = mock.PropertyMock( return_value=True) with mock.patch( 'apache.aurora.executor.common.health_checker.ShellHealthCheck' ) as mock_shell: HealthCheckerProvider( nosetuid_health_checks=False, mesos_containerizer_path='/some/path/mesos-containerizer' ).from_assigned_task(assigned_task, mock_sandbox) class NotNone(object): def __eq__(self, other): return other is not None assert mock_shell.mock_calls == [ mock.call(cmd='failed command', wrapper_fn=NotNone(), preexec_fn=None, timeout_secs=5.0) ]
def task_instance_from_job(job, instance, hostname): instance_context = MesosContext(instance=instance, hostname=hostname) health_check_config = HealthCheckConfig() if job.has_health_check_config(): health_check_config = job.health_check_config() ti = MesosTaskInstance(task=job.task(), role=job.role(), health_check_config=health_check_config, instance=instance) if job.has_announce(): ti = ti(announce=job.announce()) if job.has_environment(): ti = ti(environment=job.environment()) if job.has_lifecycle(): ti = ti(lifecycle=job.lifecycle()) return ti.bind(mesos=instance_context)
def test_task_health_ok(self): proxy_driver = ProxyDriver() with SignalServer(HealthyHandler) as port: with temporary_dir() as checkpoint_root: health_check_config = HealthCheckConfig(initial_interval_secs=0.1, interval_secs=0.1) _, executor = make_executor(proxy_driver, checkpoint_root, MESOS_JOB(task=SLEEP2, health_check_config=health_check_config), ports={'health': port}, fast_status=True, status_providers=(HealthCheckerProvider(),)) executor.terminated.wait() updates = proxy_driver.method_calls['sendStatusUpdate'] assert len(updates) == 3 assert updates[-1][0][0].state == mesos_pb2.TASK_FINISHED
def test_health_check_config_deprecate_message(monkeypatch): base_job = Job(name='hello_bond', role='james', cluster='marine-cluster', health_check_config=HealthCheckConfig( max_consecutive_failures=1, endpoint='/to_be_deprecated'), task=Task(name='main', processes=[], resources=Resources(cpu=0.1, ram=64 * MB, disk=64 * MB))) log_items = [] def capture_log(msg): log_items.append(msg) monkeypatch.setattr(log, 'warn', capture_log) config._validate_health_check_config(AuroraConfig(base_job)) assert log_items == [HTTP_DEPRECATION_WARNING]
def test_from_assigned_task(self): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=7)).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'health': 9001}) health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, None) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs hct_max_fail = health_checker.threaded_health_checker.max_consecutive_failures assert hct_max_fail == max_consecutive_failures
def test_from_assigned_task_no_health_port(self): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 task_config = TaskConfig(executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) # No health port and we don't have a shell_command. assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'http': 9001}) health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, None) self.assertIsNone(health_checker)
def test_from_assigned_task_shell(self, mock_getpwnam): interval_secs = 17 initial_interval_secs = 3 max_consecutive_failures = 2 timeout_secs = 5 shell_config = ShellHealthChecker(shell_command='failed command') task_config = TaskConfig( job=JobKey(role='role', environment='env', name='name'), executorConfig=ExecutorConfig( name='thermos-generic', data=MESOS_JOB( task=HELLO_WORLD, health_check_config=HealthCheckConfig( health_checker=HealthCheckerConfig(shell=shell_config), interval_secs=interval_secs, initial_interval_secs=initial_interval_secs, max_consecutive_failures=max_consecutive_failures, timeout_secs=timeout_secs, )).json_dumps())) assigned_task = AssignedTask(task=task_config, instanceId=1, assignedPorts={'foo': 9001}) execconfig_data = json.loads(assigned_task.task.executorConfig.data) assert execconfig_data['health_check_config']['health_checker'][ 'shell']['shell_command'] == 'failed command' mock_sandbox = mock.Mock(spec_set=SandboxInterface) type(mock_sandbox).root = mock.PropertyMock(return_value='/some/path') type(mock_sandbox).is_filesystem_image = mock.PropertyMock( return_value=False) health_checker = HealthCheckerProvider().from_assigned_task( assigned_task, mock_sandbox) assert health_checker.threaded_health_checker.interval == interval_secs assert health_checker.threaded_health_checker.initial_interval == initial_interval_secs hct_max_fail = health_checker.threaded_health_checker.max_consecutive_failures assert hct_max_fail == max_consecutive_failures mock_getpwnam.assert_called_once_with(task_config.job.role)
def test_task_instance_from_job(): instance = task_instance_from_job(Job(health_check_config=HealthCheckConfig(interval_secs=30)), 0) assert instance is not None