def wait_for_service_endpoint(service_name, timeout_sec=120, path=""): """ Checks the service url. Waits for exhibitor to start up (up to 20 minutes) and then checks the url on all masters. if available it returns true, on expiration throws an exception """ def master_service_status_code(url): logger.info('Querying %s', url) auth = DCOSAcsAuth(authentication.dcos_acs_token()) response = requests.get(url=url, timeout=5, auth=auth, verify=verify_ssl()) return response.status_code schema = 'https' if ee_version() == 'strict' or ee_version( ) == 'permissive' else 'http' logger.info( 'Waiting for service /service/{}/{} to become available on all masters' .format(service_name, path)) for ip in dcos_masters_public_ips(): url = "{}://{}/service/{}/{}".format(schema, ip, service_name, path) assert_that(lambda: master_service_status_code(url), eventually(equal_to(200), max_attempts=timeout_sec / 5))
def test_launch_docker_grace_period(marathon_service_name): """Tests 'taskKillGracePeriodSeconds' option using a Docker container in a Marathon environment. Read more details about this test in `test_root_marathon.py::test_launch_mesos_root_marathon_grace_period` """ app_id = '/launch-docker-grace-period-app' app_def = apps.docker_http_server(app_id) app_def['container']['docker']['image'] = 'kensipe/python-test' default_grace_period = 3 grace_period = 20 app_def['taskKillGracePeriodSeconds'] = grace_period app_def['cmd'] = 'python test.py' task_name = app_id.lstrip('/') client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) tasks = get_service_task(marathon_service_name, task_name) assert tasks is not None client.scale_app(app_id, 0) tasks = get_service_task(marathon_service_name, task_name) assert tasks is not None # tasks should still be here after the default_graceperiod time.sleep(default_grace_period + 1) tasks = get_service_task(marathon_service_name, task_name) assert tasks is not None # but not after the set grace_period time.sleep(grace_period) assert_that(lambda: get_service_task(marathon_service_name, task_name), eventually(equal_to(None), max_attempts=30))
def deployment_wait(service_id=None, deployment_id=None, wait_fixed=2000, max_attempts=60, client=None): """ Wait for a specific app/pod to deploy successfully. If no app/pod Id passed, wait for all current deployments to succeed. This inner matcher will retry fetching deployments after `wait_fixed` milliseconds but give up after `max_attempts` tries. """ assert not all([service_id, deployment_id ]), "Use either deployment_id or service_id, but not both." if deployment_id: logger.info( "Waiting for the deployment_id {} to finish".format(deployment_id)) elif service_id: logger.info('Waiting for {} to deploy successfully'.format(service_id)) else: logger.info('Waiting for all current deployments to finish') assert_that( lambda: deployments_for(service_id, deployment_id, client), eventually(has_len(0), wait_fixed=wait_fixed, max_attempts=max_attempts))
def test_run_app_with_non_existing_user(): """Runs an app with a non-existing user, which should be failing.""" app_def = apps.sleep_app() app_def['user'] = '******' client = marathon.create_client() client.add_app(app_def) assert_that(lambda: client.get_app(app_def["id"]), eventually( prop(['lastTaskFailure', 'message'], contains_string("No such user 'bad'")), max_attempts=30))
def test_run_app_with_non_downloadable_artifact(): """Runs an app with a non-downloadable artifact.""" app_def = apps.sleep_app() app_def['fetch'] = [{"uri": "http://localhost/missing-artifact"}] client = marathon.create_client() client.add_app(app_def) assert_that(lambda: client.get_app(app_def["id"]), eventually( prop(['lastTaskFailure', 'message'], contains_string("Failed to fetch all URIs for container")), max_attempts=30)) # NOQA E501
def test_failing_health_check_results_in_unhealthy_app(): """Tests failed health checks of an app. The health check is meant to never pass.""" app_def = apps.http_server() app_def['healthChecks'] = [common.health_check('/bad-url', 'HTTP', failures=0, timeout=3)] client = marathon.create_client() client.add_app(app_def) assert_that(lambda: client.get_app(app_def["id"]), eventually( has_values(tasksRunning=1, tasksHealthy=0, tasksUnhealthy=1), max_attempts=30))
def test_run_app_with_non_downloadable_artifact(): """Runs an app with a non-downloadable artifact.""" app_def = apps.sleep_app() app_def['fetch'] = [{"uri": "http://localhost/missing-artifact"}] client = marathon.create_client() client.add_app(app_def) assert_that(lambda: client.get_app(app_def["id"]), eventually(prop( ['lastTaskFailure', 'message'], contains_string("Failed to fetch all URIs for container")), max_attempts=30)) # NOQA E501
def test_run_app_with_non_existing_user(): """Runs an app with a non-existing user, which should be failing.""" app_def = apps.sleep_app() app_def['user'] = '******' client = marathon.create_client() client.add_app(app_def) assert_that( lambda: client.get_app(app_def["id"]), eventually(prop(['lastTaskFailure', 'message'], contains_string("No such user 'bad'")), max_attempts=30))
def test_launch_app_timed(): """Most tests wait until a task is launched with no reference to time. This test verifies that if a app is launched on marathon that within 3 secs there is a task spawned. """ app_def = apps.mesos_app(app_id='/timed-launch-app') app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) # if not launched in 10 sec fail assert_that(lambda: client.get_tasks(app_id), eventually(has_len(equal_to(1)), max_attempts=10))
def test_launch_mesos_container_with_docker_image(): """Launches a Mesos container with a Docker image.""" app_def = apps.ucr_docker_http_server(app_id='/launch-mesos-container-with-docker-image-app') app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) assert_that(lambda: client.get_tasks(app_id), eventually(has_len(equal_to(1)), max_attempts=30)) app = client.get_app(app_id) assert app['container']['type'] == 'MESOS', "The container type is not MESOS"
def test_failing_health_check_results_in_unhealthy_app(): """Tests failed health checks of an app. The health check is meant to never pass.""" app_def = apps.http_server() app_def['healthChecks'] = [ common.health_check('/bad-url', 'HTTP', failures=0, timeout=3) ] client = marathon.create_client() client.add_app(app_def) assert_that( lambda: client.get_app(app_def["id"]), eventually(has_values(tasksRunning=1, tasksHealthy=0, tasksUnhealthy=1), max_attempts=30))
def test_health_check_works_with_resident_task(): """Verifies that resident tasks (common for Persistent Volumes) do not fail health checks. Marathon bug: https://jira.mesosphere.com/browse/MARATHON-7050 """ app_def = apps.resident_docker_app() app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id, max_attempts=500) tasks = client.get_tasks(app_def["id"]) assert len(tasks) == 1, "The number of tasks is {}, but 1 was expected".format(len(tasks)) assert_that(lambda: client.get_app(app_def['id']), eventually(has_value('tasksHealthy', 1), max_attempts=30))
def test_restart_container_with_persistent_volume(): """A task with a persistent volume, which writes to a file in the persistent volume, is launched. The app is killed and restarted and we can still read from the persistent volume what was written to it. """ app_def = apps.persistent_volume_app() app_id = app_def['id'] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) tasks = client.get_tasks(app_id) assert len( tasks ) == 1, "The number of tasks is {} after deployment, but 1 was expected".format( len(tasks)) host = tasks[0]['host'] port = tasks[0]['ports'][0] cmd = "curl {}:{}/data/foo".format(host, port) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task(cmd, target_data): run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert data == target_data, "'{}' was not equal to {}".format( data, target_data) check_task(cmd, target_data='hello\n') client.restart_app(app_id) deployment_wait(service_id=app_id) assert_that(lambda: client.get_tasks(app_id), eventually(has_len(equal_to(1)), max_attempts=30)) host = tasks[0]['host'] port = tasks[0]['ports'][0] cmd = "curl {}:{}/data/foo".format(host, port) check_task(cmd, target_data='hello\nhello\n')
def test_launch_mesos_container_with_docker_image(): """Launches a Mesos container with a Docker image.""" app_def = apps.ucr_docker_http_server( app_id='/launch-mesos-container-with-docker-image-app') app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) assert_that(lambda: client.get_tasks(app_id), eventually(has_len(equal_to(1)), max_attempts=30)) app = client.get_app(app_id) assert app['container'][ 'type'] == 'MESOS', "The container type is not MESOS"
def test_healtchcheck_and_volume(): """Launches a Docker container on Marathon.""" app_def = apps.healthcheck_and_volume() app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) tasks = client.get_tasks(app_id) app = client.get_app(app_id) assert len(tasks) == 1, "The number of tasks is {} after deployment, but only 1 was expected".format(len(tasks)) assert len(app['container']['volumes']) == 2, "The container does not have the correct amount of volumes" # check if app becomes healthy assert_that(lambda: client.get_app(app_id), eventually(has_value('tasksHealthy', 1), max_attempts=30))
def test_health_check_works_with_resident_task(): """Verifies that resident tasks (common for Persistent Volumes) do not fail health checks. Marathon bug: https://jira.mesosphere.com/browse/MARATHON-7050 """ app_def = apps.resident_docker_app() app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id, max_attempts=500) tasks = client.get_tasks(app_def["id"]) assert len( tasks) == 1, "The number of tasks is {}, but 1 was expected".format( len(tasks)) assert_that(lambda: client.get_app(app_def['id']), eventually(has_value('tasksHealthy', 1), max_attempts=30))
def test_task_failure_recovers(): """Tests that if a task is KILLED, another one will be launched with a different ID.""" app_def = apps.sleep_app() app_def['cmd'] = 'sleep 1000' app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) tasks = client.get_tasks(app_id) old_task_id = tasks[0]['id'] host = tasks[0]['host'] common.kill_process_on_host(host, '[s]leep 1000') assert_that(lambda: client.get_tasks(app_id)[0], eventually(has_value('id', not_(equal_to(old_task_id))), max_attempts=30))
def test_task_failure_recovers(): """Tests that if a task is KILLED, another one will be launched with a different ID.""" app_def = apps.sleep_app() app_def['cmd'] = 'sleep 1000' app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) tasks = client.get_tasks(app_id) old_task_id = tasks[0]['id'] host = tasks[0]['host'] common.kill_process_on_host(host, '[s]leep 1000') assert_that( lambda: client.get_tasks(app_id)[0], eventually(has_value('id', not_(equal_to(old_task_id))), max_attempts=30))
def test_app_update_rollback(): """Tests that an updated app can be rolled back to its initial version.""" app_def = apps.readiness_and_health_app("app-update-rollback") app_id = app_def["id"] # First deployment client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) tasks = client.get_tasks(app_id) assert_that(tasks, has_len(equal_to(1))) # Second deployment app_def['instances'] = 2 client.update_app(app_id, app_def) deployment_wait(service_id=app_id) tasks = client.get_tasks(app_id) assert_that(tasks, has_len(equal_to(2))) # Third deployment with rollback # provides a testing delay to rollback in the meantime app_def['readinessChecks'][0]['intervalSeconds'] = 30 app_def['instances'] = 1 deployment_id = client.update_app(app_id, app_def) client.rollback_deployment(deployment_id) deployment_wait(service_id=app_id) # update to 1 instance is rollback to 2 tasks = client.get_tasks(app_id) assert_that(tasks, has_len(equal_to(2)))
def test_restart_container_with_persistent_volume(): """A task with a persistent volume, which writes to a file in the persistent volume, is launched. The app is killed and restarted and we can still read from the persistent volume what was written to it. """ app_def = apps.persistent_volume_app() app_id = app_def['id'] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) tasks = client.get_tasks(app_id) assert len(tasks) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(len(tasks)) host = tasks[0]['host'] port = tasks[0]['ports'][0] cmd = "curl {}:{}/data/foo".format(host, port) @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception) def check_task(cmd, target_data): run, data = run_command_on_master(cmd) assert run, "{} did not succeed".format(cmd) assert data == target_data, "'{}' was not equal to {}".format(data, target_data) check_task(cmd, target_data='hello\n') client.restart_app(app_id) deployment_wait(service_id=app_id) assert_that(lambda: client.get_tasks(app_id), eventually(has_len(equal_to(1)), max_attempts=30)) host = tasks[0]['host'] port = tasks[0]['ports'][0] cmd = "curl {}:{}/data/foo".format(host, port) check_task(cmd, target_data='hello\nhello\n')
def test_healtchcheck_and_volume(): """Launches a Docker container on Marathon.""" app_def = apps.healthcheck_and_volume() app_id = app_def["id"] client = marathon.create_client() client.add_app(app_def) deployment_wait(service_id=app_id) tasks = client.get_tasks(app_id) app = client.get_app(app_id) assert len( tasks ) == 1, "The number of tasks is {} after deployment, but only 1 was expected".format( len(tasks)) assert len( app['container']['volumes'] ) == 2, "The container does not have the correct amount of volumes" # check if app becomes healthy assert_that(lambda: client.get_app(app_id), eventually(has_value('tasksHealthy', 1), max_attempts=30))