def test_vip_mesos_cmd(marathon_service_name): """Validates the creation of an app with a VIP label and the accessibility of the service via the VIP.""" app_def = apps.http_server() app_id = app_def["id"] vip_name = app_id.lstrip("/") fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name, marathon_service_name) app_def['portDefinitions'] = [{ "port": 0, "protocol": "tcp", "name": "{}".format(vip_name), "labels": { "VIP_0": "/{}:10000".format(vip_name) } }] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def http_output_check(): time.sleep(1) common.assert_http_code('{}:{}'.format(fqn, 10000)) http_output_check()
def test_vip_mesos_cmd(marathon_service_name): """Validates the creation of an app with a VIP label and the accessibility of the service via the VIP.""" app_def = apps.http_server() vip_name = app_def["id"].lstrip("/") fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name, marathon_service_name) app_def['portDefinitions'] = [{ "port": 0, "protocol": "tcp", "name": "{}".format(vip_name), "labels": { "VIP_0": "/{}:10000".format(vip_name) } }] client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def http_output_check(): time.sleep(1) common.assert_http_code('{}:{}'.format(fqn, 10000)) http_output_check()
def test_failing_health_check_results_in_unhealthy_app(): """Tests failed health checks of an app. The health check is meant to never pass.""" app_def = apps.http_server() app_def['healthChecks'] = [ common.health_check('/bad-url', 'HTTP', failures=0, timeout=3) ] client = marathon.create_client() client.add_app(app_def) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_failure_message(): app = client.get_app(app_def["id"]) print("{}, {}, {}".format(app['tasksRunning'], app['tasksHealthy'], app['tasksUnhealthy'])) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 0, \ "The number of healthy tasks is {}, but 0 was expected".format(app['tasksHealthy']) assert app['tasksUnhealthy'] == 1, \ "The number of unhealthy tasks is {}, but 1 was expected".format(app['tasksUnhealthy']) check_failure_message()
def test_task_gets_restarted_due_to_network_split(): """Verifies that a health check fails in presence of a network partition.""" app_def = apps.http_server() app_id = app_def["id"] app_def['healthChecks'] = [common.health_check()] common.pin_to_host(app_def, common.ip_other_than_mom()) client = marathon.create_client() client.add_app(app_def) common.deployment_wait(service_id=app_id) app = client.get_app(app_id) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy']) tasks = client.get_tasks(app_id) task_id = tasks[0]['id'] host = tasks[0]['host'] port = tasks[0]['ports'][0] # introduce a network partition common.block_iptable_rules_for_seconds(host, port, sleep_seconds=10, block_input=True, block_output=False) common.deployment_wait(service_id=app_id) app = client.get_app(app_id) tasks = client.get_tasks(app_id) new_task_id = tasks[0]['id'] assert task_id != new_task_id, "The task didn't get killed because of a failed health check" assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 0 was expected".format(app['tasksHealthy']) # network partition should cause a task restart @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_health_message(): tasks = client.get_tasks(app_id) new_task_id = tasks[0]['id'] assert task_id != new_task_id, "The task has not been restarted: {}".format( task_id) app = client.get_app(app_id) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy']) check_health_message()
def test_failing_health_check_results_in_unhealthy_app(): """Tests failed health checks of an app. The health check is meant to never pass.""" app_def = apps.http_server() app_def['healthChecks'] = [common.health_check('/bad-url', 'HTTP', failures=0, timeout=3)] client = marathon.create_client() client.add_app(app_def) assert_that(lambda: client.get_app(app_def["id"]), eventually( has_values(tasksRunning=1, tasksHealthy=0, tasksUnhealthy=1), max_attempts=30))
def test_task_gets_restarted_due_to_network_split(): """Verifies that a health check fails in presence of a network partition.""" app_def = apps.http_server() app_def['healthChecks'] = [common.health_check()] common.pin_to_host(app_def, common.ip_other_than_mom()) client = marathon.create_client() client.add_app(app_def) shakedown.deployment_wait() app = client.get_app(app_def["id"]) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy']) tasks = client.get_tasks(app_def["id"]) task_id = tasks[0]['id'] host = tasks[0]['host'] port = tasks[0]['ports'][0] # introduce a network partition with shakedown.iptable_rules(host): common.block_port(host, port) time.sleep(10) shakedown.deployment_wait() app = client.get_app(app_def["id"]) tasks = client.get_tasks(app_def["id"]) new_task_id = tasks[0]['id'] assert task_id != new_task_id, "The task didn't get killed because of a failed health check" assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 0 was expected".format(app['tasksHealthy']) # network partition should cause a task restart @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_health_message(): tasks = client.get_tasks(app_def["id"]) new_task_id = tasks[0]['id'] assert task_id != new_task_id, "The task has not been restarted: {}".format(task_id) app = client.get_app(app_def["id"]) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy']) check_health_message()
def test_task_gets_restarted_due_to_network_split(): """Verifies that a health check fails in presence of a network partition.""" app_def = apps.http_server("app-network-split") app_id = app_def["id"] app_def['healthChecks'] = [common.health_check()] common.pin_to_host(app_def, common.ip_other_than_mom()) client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) app = client.get_app(app_id) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy']) tasks = client.get_tasks(app_id) task_id = tasks[0]['id'] host = tasks[0]['host'] port = tasks[0]['ports'][0] # introduce a network partition common.block_iptable_rules_for_seconds(host, port, sleep_seconds=10, block_input=True, block_output=False) # Network partition should cause the task to restart N times until the partition is resolved (since we # pinned the task to the split agent). A new task with a new taskId should eventually be running and healthy. @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_health_message(): tasks = client.get_tasks(app_id) new_task_id = tasks[0]['id'] assert task_id != new_task_id, "The task has not been restarted: {}".format(task_id) app = client.get_app(app_id) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 1, \ "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy']) check_health_message()
def test_failing_health_check_results_in_unhealthy_app(): """Tests failed health checks of an app. The health check is meant to never pass.""" app_def = apps.http_server() app_def['healthChecks'] = [common.health_check('/bad-url', 'HTTP', failures=0, timeout=3)] client = marathon.create_client() client.add_app(app_def) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_failure_message(): app = client.get_app(app_def["id"]) print("{}, {}, {}".format(app['tasksRunning'], app['tasksHealthy'], app['tasksUnhealthy'])) assert app['tasksRunning'] == 1, \ "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning']) assert app['tasksHealthy'] == 0, \ "The number of healthy tasks is {}, but 0 was expected".format(app['tasksHealthy']) assert app['tasksUnhealthy'] == 1, \ "The number of unhealthy tasks is {}, but 1 was expected".format(app['tasksUnhealthy']) check_failure_message()
def test_http_health_check_healthy(protocol): """Tests HTTP, MESOS_HTTP, TCP and MESOS_TCP health checks against a web-server in Python.""" app_def = apps.http_server() client = marathon.create_client() assert_app_healthy(client, app_def, common.health_check(protocol=protocol))