def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.block_iptable_rules_for_seconds(host, port, 7, block_input=True, block_output=False)
    deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for new_task in tasks:
        new_task_id = new_task['id']
        assert new_task_id != initial_id1, f"Task {new_task_id} has not been restarted" # NOQA E999
        assert new_task_id != initial_id2, f"Task {new_task_id} has not been restarted"
def test_pod_with_container_network():
    """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_net_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.dcos.cluster.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx")

    network_info = common.running_status_network_info(task['statuses'])
    assert network_info['name'] == "dcos", \
        "The network name is {}, but 'dcos' was expected".format(network_info['name'])

    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert container_ip is not None, "No IP address has been assigned to the pod's container"

    url = "http://{}:80/".format(container_ip)
    common.assert_http_code(url)
def test_pod_with_container_bridge_network():
    """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_bridge_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.dcos.cluster.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx")
    network_info = common.running_status_network_info(task['statuses'])
    assert network_info['name'] == "mesos-bridge", \
        "The network is {}, but mesos-bridge was expected".format(network_info['name'])

    # get the port on the host
    port = task['discovery']['ports']['ports'][0]['number']

    # the agent IP:port will be routed to the bridge IP:port
    # test against the agent_ip, however it is hard to get.. translating from
    # slave_id
    agent_ip = common.agent_hostname_by_id(task['slave_id'])
    assert agent_ip is not None, "Failed to get the agent IP address"
    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert agent_ip != container_ip, "The container IP address is the same as the agent one"

    url = "http://{}:{}/".format(agent_ip, port)
    common.assert_http_code(url)
async def test_event_channel_for_pods(sse_events):
    """Tests the Marathon event channel specific to pod events."""

    await common.assert_event('event_stream_attached', sse_events)

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write files.
    if shakedown.dcos.cluster.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    await common.assert_event('pod_created_event', sse_events)
    await common.assert_event('deployment_step_success', sse_events)

    pod_def["scaling"]["instances"] = 3
    client.update_pod(pod_id, pod_def)
    deployment_wait(service_id=pod_id)

    await common.assert_event('pod_updated_event', sse_events)
def assert_mom_ee(version, security_mode='permissive'):
    ensure_service_account()
    ensure_permissions()
    ensure_sa_secret(strict=True if security_mode == 'strict' else False)
    ensure_docker_config_secret()

    # In strict mode all tasks are started as user `nobody` by default. However we start
    # MoM-EE as 'root' and for that we need to give root marathon ACLs to start
    # tasks as 'root'.
    if security_mode == 'strict':
        common.add_dcos_marathon_user_acls()

    # Deploy MoM-EE in permissive mode
    app_def_file = '{}/mom-ee-{}-{}.json'.format(fixtures.fixtures_dir(),
                                                 security_mode, version)
    assert os.path.isfile(
        app_def_file
    ), "Couldn't find appropriate MoM-EE definition: {}".format(app_def_file)

    image = mom_ee_image(version)
    logger.info('Deploying {} definition with {} image'.format(
        app_def_file, image))

    app_def = get_resource(app_def_file)
    app_def['container']['docker'][
        'image'] = 'mesosphere/marathon-dcos-ee:{}'.format(image)
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)
    shakedown.dcos.service.wait_for_service_endpoint(mom_ee_endpoint(
        version, security_mode),
                                                     path="ping")
def test_mom_when_mom_agent_bounced():
    """Launch an app from MoM and restart the node MoM is on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    mom_ip = common.ip_of_mom()
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        restart_agent(mom_ip)

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
示例#7
0
def test_pod_with_persistent_volume():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)

    host = common.running_status_network_info(tasks[0]['statuses'])['ip_addresses'][0]['ip_address']

    # Container with the name 'container1' appends its taskId to the file. So we search for the
    # taskId of that container which is not always the tasks[0]
    expected_data = next((t['id'] for t in tasks if t['name'] == 'container1'), None)
    assert expected_data, f"Hasn't found a container with the name 'container1' in the pod {tasks}"

    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    logger.info('Deployd two containers on {}:{}/{} and {}:{}/{}'.format(host, port1, path1, host, port2, path2))

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=60, retry_on_exception=common.ignore_exception)
    def check_http_endpoint(port, path, expected):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert expected in data, "'{}' was not found in '{}'".format(data, expected)

    check_http_endpoint(port1, path1, expected_data)
    check_http_endpoint(port2, path2, expected_data)
示例#8
0
def test_vip_mesos_cmd(marathon_service_name):
    """Validates the creation of an app with a VIP label and the accessibility of the service via the VIP."""

    app_def = apps.http_server()
    app_id = app_def["id"]

    vip_name = app_id.lstrip("/")
    fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name,
                                                 marathon_service_name)

    app_def['portDefinitions'] = [{
        "port": 0,
        "protocol": "tcp",
        "name": "{}".format(vip_name),
        "labels": {
            "VIP_0": "/{}:10000".format(vip_name)
        }
    }]

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def http_output_check():
        time.sleep(1)
        common.assert_http_code('{}:{}'.format(fqn, 10000))

    http_output_check()
def test_marathon_when_disconnected_from_zk():
    """Launches an app from Marathon, then knocks out access to ZK from Marathon.
       Verifies the task is preserved.
    """

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']

    common.block_iptable_rules_for_seconds(host, 2181, sleep_seconds=10, block_input=True, block_output=False)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()
示例#10
0
def test_pinned_task_does_not_scale_to_unpinned_host():
    """Tests when a task lands on a pinned node (and barely fits) and it is asked to scale past
       the resources of that node, no tasks will be launched on any other node.
    """

    app_def = apps.sleep_app()
    app_id = app_def['id']

    host = common.ip_other_than_mom()
    logger.info('Constraint set to host: {}'.format(host))
    # the size of cpus is designed to be greater than 1/2 of a node
    # such that only 1 task can land on the node.
    cores = common.cpus_on_agent(host)
    app_def['cpus'] = max(0.6, cores - 0.5)
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    client.scale_app(app_id, 2)

    time.sleep(5)
    deployments = client.get_deployments(app_id=app_id)
    tasks = client.get_tasks(app_id)

    # still deploying
    assert len(
        deployments
    ) == 1, "The number of deployments is {}, but 1 was expected".format(
        len(deployments))
    assert len(
        tasks) == 1, "The number of tasks is {}, but 1 was expected".format(
            len(tasks))
示例#11
0
def test_task_gets_restarted_due_to_network_split():
    """Verifies that a health check fails in presence of a network partition."""

    app_def = apps.http_server()
    app_id = app_def["id"]
    app_def['healthChecks'] = [common.health_check()]
    common.pin_to_host(app_def, common.ip_other_than_mom())

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, \
        "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
    assert app['tasksHealthy'] == 1, \
        "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy'])

    tasks = client.get_tasks(app_id)
    task_id = tasks[0]['id']
    host = tasks[0]['host']
    port = tasks[0]['ports'][0]

    # introduce a network partition
    common.block_iptable_rules_for_seconds(host,
                                           port,
                                           sleep_seconds=10,
                                           block_input=True,
                                           block_output=False)

    deployment_wait(service_id=app_id)

    app = client.get_app(app_id)
    tasks = client.get_tasks(app_id)
    new_task_id = tasks[0]['id']
    assert task_id != new_task_id, "The task didn't get killed because of a failed health check"

    assert app['tasksRunning'] == 1, \
        "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
    assert app['tasksHealthy'] == 1, \
        "The number of healthy tasks is {}, but 0 was expected".format(app['tasksHealthy'])

    # network partition should cause a task restart
    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_health_message():
        tasks = client.get_tasks(app_id)
        new_task_id = tasks[0]['id']
        assert task_id != new_task_id, "The task has not been restarted: {}".format(
            task_id)

        app = client.get_app(app_id)
        assert app['tasksRunning'] == 1, \
            "The number of running tasks is {}, but 1 was expected".format(app['tasksRunning'])
        assert app['tasksHealthy'] == 1, \
            "The number of healthy tasks is {}, but 1 was expected".format(app['tasksHealthy'])

    check_health_message()
示例#12
0
def test_pod_with_persistent_volume():
    pod_def = pods.persistent_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)

    host = common.running_status_network_info(
        tasks[0]['statuses'])['ip_addresses'][0]['ip_address']
    port1 = tasks[0]['discovery']['ports']['ports'][0]["number"]
    port2 = tasks[1]['discovery']['ports']['ports'][0]["number"]
    path1 = tasks[0]['container']['volumes'][0]['container_path']
    path2 = tasks[1]['container']['volumes'][0]['container_path']
    logger.info('Deployd two containers on {}:{}/{} and {}:{}/{}'.format(
        host, port1, path1, host, port2, path2))

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=60,
                    retry_on_exception=common.ignore_exception)
    def check_http_endpoint(port, path):
        cmd = "curl {}:{}/{}/foo".format(host, port, path)
        run, data = run_command_on_master(cmd)
        assert run, "{} did not succeed".format(cmd)
        assert data == 'hello\n', "'{}' was not equal to hello\\n".format(data)

    check_http_endpoint(port1, path1)
    check_http_endpoint(port2, path2)
示例#13
0
def test_pod_with_container_bridge_network():
    """Tests creation of a pod with a "container/bridge" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_bridge_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.dcos.cluster.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx")
    network_info = common.running_status_network_info(task['statuses'])
    assert network_info['name'] == "mesos-bridge", \
        "The network is {}, but mesos-bridge was expected".format(network_info['name'])

    # get the port on the host
    port = task['discovery']['ports']['ports'][0]['number']

    # the agent IP:port will be routed to the bridge IP:port
    # test against the agent_ip, however it is hard to get.. translating from
    # slave_id
    agent_ip = common.agent_hostname_by_id(task['slave_id'])
    assert agent_ip is not None, "Failed to get the agent IP address"
    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert agent_ip != container_ip, "The container IP address is the same as the agent one"

    url = "http://{}:{}/".format(agent_ip, port)
    common.assert_http_code(url)
示例#14
0
def test_pod_health_failed_check():
    """Deploys a pod with correct health checks, then partitions the network and verifies that
       the tasks get restarted with new task IDs.
    """

    pod_def = pods.ports_pod()
    pod_id = pod_def['id']

    host = common.ip_other_than_mom()
    common.pin_pod_to_host(pod_def, host)

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    pod = client.list_pod()[0]
    container1 = pod['instances'][0]['containers'][0]
    port = container1['endpoints'][0]['allocatedHostPort']

    common.block_iptable_rules_for_seconds(host,
                                           port,
                                           7,
                                           block_input=True,
                                           block_output=False)
    deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    for new_task in tasks:
        new_task_id = new_task['id']
        assert new_task_id != initial_id1, f"Task {new_task_id} has not been restarted"  # NOQA E999
        assert new_task_id != initial_id2, f"Task {new_task_id} has not been restarted"
示例#15
0
def test_pod_restarts_on_nonzero_exit_code():
    """Verifies that a pod get restarted in case one of its containers exits with a non-zero code.
       As a result, after restart, there should be two new tasks for different IDs.
    """

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']
    pod_def["scaling"]["instances"] = 1
    pod_def['containers'][0]['exec']['command'][
        'shell'] = 'sleep 5; echo -n leaving; exit 2'

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    initial_id1 = tasks[0]['id']
    initial_id2 = tasks[1]['id']

    time.sleep(
        6)  # 1 sec past the 5 sec sleep in one of the container's command
    tasks = common.get_pod_tasks(pod_id)
    for task in tasks:
        assert task['id'] != initial_id1, "Got the same task ID"
        assert task['id'] != initial_id2, "Got the same task ID"
示例#16
0
def test_pod_with_container_network():
    """Tests creation of a pod with a "container" network, and its HTTP endpoint accessibility."""

    pod_def = pods.container_net_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if shakedown.dcos.cluster.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    task = common.task_by_name(common.get_pod_tasks(pod_id), "nginx")

    network_info = common.running_status_network_info(task['statuses'])
    assert network_info['name'] == "dcos", \
        "The network name is {}, but 'dcos' was expected".format(network_info['name'])

    container_ip = network_info['ip_addresses'][0]['ip_address']
    assert container_ip is not None, "No IP address has been assigned to the pod's container"

    url = "http://{}:80/".format(container_ip)
    common.assert_http_code(url)
示例#17
0
def test_two_pods_with_shared_volume():
    """Confirms that 1 container can read data in a volume that was written from the other container.
       The reading container fails if it can't read the file. So if there are 2 tasks after
       4 seconds we are good.
    """

    pod_def = pods.ephemeral_volume_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    tasks = common.get_pod_tasks(pod_id)
    assert len(
        tasks
    ) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(
        len(tasks))

    time.sleep(4)

    tasks = common.get_pod_tasks(pod_id)
    assert len(
        tasks
    ) == 2, "The number of tasks is {} after sleeping, but 2 was expected".format(
        len(tasks))
示例#18
0
def test_create_and_update_pod():
    """Versions and reverting with pods"""

    pod_def = pods.simple_pod()
    pod_def["scaling"]["instances"] = 1
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    pod_def["scaling"]["instances"] = 3
    client.update_pod(pod_id, pod_def)
    deployment_wait(service_id=pod_id)

    versions = get_pod_versions(pod_id)
    assert len(
        versions
    ) == 2, "The number of versions is {}, but 2 was expected".format(
        len(versions))

    version1 = get_pod_version(pod_id, versions[0])
    version2 = get_pod_version(pod_id, versions[1])
    assert version1["scaling"]["instances"] != version2["scaling"]["instances"], \
        "Two pod versions have the same number of instances: {}, but they should not".format(
            version1["scaling"]["instances"])
示例#19
0
async def test_event_channel_for_pods(sse_events):
    """Tests the Marathon event channel specific to pod events."""

    await common.assert_event('event_stream_attached', sse_events)

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write files.
    if shakedown.dcos.cluster.ee_version() == 'strict':
        pod_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    await common.assert_event('pod_created_event', sse_events)
    await common.assert_event('deployment_step_success', sse_events)

    pod_def["scaling"]["instances"] = 3
    client.update_pod(pod_id, pod_def)
    deployment_wait(service_id=pod_id)

    await common.assert_event('pod_updated_event', sse_events)
示例#20
0
def test_create_pod_with_private_image():
    """Deploys a pod with a private Docker image, using Mesos containerizer.
        This method relies on the global `install_enterprise_cli` fixture to install the
        enterprise-cli-package.
    """

    username = os.environ['DOCKER_HUB_USERNAME']
    password = os.environ['DOCKER_HUB_PASSWORD']

    secret_name = "pullconfig"
    secret_value_json = common.create_docker_pull_config_json(
        username, password)
    secret_value = json.dumps(secret_value_json)

    pod_def = pods.private_docker_pod()
    pod_id = pod_def['id']
    common.create_secret(secret_name, secret_value)
    client = marathon.create_client()

    try:
        client.add_pod(pod_def)
        deployment_wait(service_id=pod_id, max_attempts=300)
        pod = client.show_pod(pod_id)
        assert pod is not None, "The pod has not been created"
    finally:
        common.delete_secret(secret_name)
示例#21
0
def test_pinned_task_scales_on_host_only():
    """Tests that a pinned app scales only on the pinned node."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(
        len(tasks))
    assert tasks[0]['host'] == host, \
        "The task is on {}, but it is supposed to be on {}".format(tasks[0]['host'], host)

    client.scale_app(app_id, 10)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 10, "The number of tasks is {} after scale, but 10 was expected".format(
        len(tasks))
    for task in tasks:
        assert task[
            'host'] == host, "The task is on {}, but it is supposed to be on {}".format(
                task['host'], host)
def test_pinned_task_scales_on_host_only():
    """Tests that a pinned app scales only on the pinned node."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(len(tasks))
    assert tasks[0]['host'] == host, \
        "The task is on {}, but it is supposed to be on {}".format(tasks[0]['host'], host)

    client.scale_app(app_id, 10)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 10, "The number of tasks is {} after scale, but 10 was expected".format(len(tasks))
    for task in tasks:
        assert task['host'] == host, "The task is on {}, but it is supposed to be on {}".format(task['host'], host)
示例#23
0
def test_private_repository_mesos_app():
    """Deploys an app with a private Docker image, using Mesos containerizer.
        It relies on the global `install_enterprise_cli` fixture to install the
        enterprise-cli-package.
    """

    username = os.environ['DOCKER_HUB_USERNAME']
    password = os.environ['DOCKER_HUB_PASSWORD']

    secret_name = "pullconfig"
    secret_value_json = common.create_docker_pull_config_json(username, password)
    secret_value = json.dumps(secret_value_json)

    app_def = apps.private_ucr_docker_app()
    app_id = app_def["id"]

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if is_strict():
        app_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    common.create_secret(secret_name, secret_value)
    client = marathon.create_client()

    try:
        client.add_app(app_def)
        deployment_wait(service_id=app_id)

        common.assert_app_tasks_running(client, app_def)
    finally:
        common.delete_secret(secret_name)
def test_launch_docker_grace_period(marathon_service_name):
    """Tests 'taskKillGracePeriodSeconds' option using a Docker container in a Marathon environment.
       Read more details about this test in `test_root_marathon.py::test_launch_mesos_root_marathon_grace_period`
    """

    app_id = '/launch-docker-grace-period-app'
    app_def = apps.docker_http_server(app_id)
    app_def['container']['docker']['image'] = 'kensipe/python-test'

    default_grace_period = 3
    grace_period = 20
    app_def['taskKillGracePeriodSeconds'] = grace_period
    app_def['cmd'] = 'python test.py'
    task_name = app_id.lstrip('/')

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    tasks = get_service_task(marathon_service_name, task_name)
    assert tasks is not None

    client.scale_app(app_id, 0)
    tasks = get_service_task(marathon_service_name, task_name)
    assert tasks is not None

    # tasks should still be here after the default_graceperiod
    time.sleep(default_grace_period + 1)
    tasks = get_service_task(marathon_service_name, task_name)
    assert tasks is not None

    # but not after the set grace_period
    time.sleep(grace_period)
    assert_that(lambda: get_service_task(marathon_service_name, task_name),
                eventually(equal_to(None), max_attempts=30))
示例#25
0
def test_app_update():
    """Tests that an app gets successfully updated."""

    app_def = apps.mesos_app(app_id='/update-app')
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(
        len(tasks))

    app_def['cpus'] = 1
    app_def['instances'] = 2

    client.update_app(app_id, app_def)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks
    ) == 2, "The number of tasks is {} after deployment, but 2 was expected".format(
        len(tasks))
def test_marathon_with_master_process_failure(marathon_service_name):
    """Launches an app and restarts the master. It is expected that the service endpoint eventually comes back and
       the task ID stays the same.
    """

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']

    common.systemctl_master('restart')
    shakedown.dcos.service.wait_for_service_endpoint(marathon_service_name, path="ping")

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_recovery():
        tasks = client.get_tasks(app_id)
        assert len(tasks) == 1, "The number of tasks is {} after master restart, but 1 was expected".format(len(tasks))
        assert tasks[0]['id'] == original_task_id, \
            "Task {} has not recovered, it got replaced with another one: {}".format(original_task_id, tasks[0]['id'])

    check_task_recovery()
示例#27
0
def test_private_repository_mesos_app():
    """Deploys an app with a private Docker image, using Mesos containerizer.
        It relies on the global `install_enterprise_cli` fixture to install the
        enterprise-cli-package.
    """

    username = os.environ['DOCKER_HUB_USERNAME']
    password = os.environ['DOCKER_HUB_PASSWORD']

    secret_name = "pullconfig"
    secret_value_json = common.create_docker_pull_config_json(
        username, password)
    secret_value = json.dumps(secret_value_json)

    app_def = apps.private_ucr_docker_app()
    app_id = app_def["id"]

    # In strict mode all tasks are started as user `nobody` by default and `nobody`
    # doesn't have permissions to write to /var/log within the container.
    if is_strict():
        app_def['user'] = '******'
        common.add_dcos_marathon_user_acls()

    common.create_secret(secret_name, secret_value)
    client = marathon.create_client()

    try:
        client.add_app(app_def)
        deployment_wait(service_id=app_id)

        common.assert_app_tasks_running(client, app_def)
    finally:
        common.delete_secret(secret_name)
def test_vip_mesos_cmd(marathon_service_name):
    """Validates the creation of an app with a VIP label and the accessibility of the service via the VIP."""

    app_def = apps.http_server()
    app_id = app_def["id"]

    vip_name = app_id.lstrip("/")
    fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name, marathon_service_name)

    app_def['portDefinitions'] = [{
        "port": 0,
        "protocol": "tcp",
        "name": "{}".format(vip_name),
        "labels": {
            "VIP_0": "/{}:10000".format(vip_name)
        }
    }]

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def http_output_check():
        time.sleep(1)
        common.assert_http_code('{}:{}'.format(fqn, 10000))

    http_output_check()
示例#29
0
def test_marathon_with_master_process_failure(marathon_service_name):
    """Launches an app and restarts the master. It is expected that the service endpoint eventually comes back and
       the task ID stays the same.
    """

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']

    common.systemctl_master('restart')
    shakedown.dcos.service.wait_for_service_endpoint(marathon_service_name,
                                                     path="ping")

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_recovery():
        tasks = client.get_tasks(app_id)
        assert len(
            tasks
        ) == 1, "The number of tasks is {} after master restart, but 1 was expected".format(
            len(tasks))
        assert tasks[0]['id'] == original_task_id, \
            "Task {} has not recovered, it got replaced with another one: {}".format(original_task_id, tasks[0]['id'])

    check_task_recovery()
示例#30
0
def test_create_pod_with_private_image():
    """Deploys a pod with a private Docker image, using Mesos containerizer.
        This method relies on the global `install_enterprise_cli` fixture to install the
        enterprise-cli-package.
    """

    username = os.environ['DOCKER_HUB_USERNAME']
    password = os.environ['DOCKER_HUB_PASSWORD']

    secret_name = "pullconfig"
    secret_value_json = common.create_docker_pull_config_json(username, password)
    secret_value = json.dumps(secret_value_json)

    pod_def = pods.private_docker_pod()
    pod_id = pod_def['id']
    common.create_secret(secret_name, secret_value)
    client = marathon.create_client()

    try:
        client.add_pod(pod_def)
        deployment_wait(service_id=pod_id, max_attempts=300)
        pod = client.show_pod(pod_id)
        assert pod is not None, "The pod has not been created"
    finally:
        common.delete_secret(secret_name)
def assert_mom_ee(version, security_mode='permissive'):
    ensure_service_account()
    ensure_permissions()
    ensure_sa_secret(strict=True if security_mode == 'strict' else False)
    ensure_docker_config_secret()

    # In strict mode all tasks are started as user `nobody` by default. However we start
    # MoM-EE as 'root' and for that we need to give root marathon ACLs to start
    # tasks as 'root'.
    if security_mode == 'strict':
        common.add_dcos_marathon_user_acls()

    # Deploy MoM-EE in permissive mode
    app_def_file = '{}/mom-ee-{}-{}.json'.format(fixtures.fixtures_dir(), security_mode, version)
    assert os.path.isfile(app_def_file), "Couldn't find appropriate MoM-EE definition: {}".format(app_def_file)

    image = mom_ee_image(version)
    logger.info('Deploying {} definition with {} image'.format(app_def_file, image))

    app_def = get_resource(app_def_file)
    app_def['container']['docker']['image'] = 'mesosphere/marathon-dcos-ee:{}'.format(image)
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)
    shakedown.dcos.service.wait_for_service_endpoint(mom_ee_endpoint(version, security_mode), path="ping")
def test_pinned_task_recovers_on_host():
    """Tests that when a pinned task gets killed, it recovers on the node it was pinned to."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)

    common.kill_process_on_host(host, '[s]leep')
    deployment_wait(service_id=app_id)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_for_new_task():
        new_tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] != new_tasks[0]['id'], "The task did not get killed: {}".format(tasks[0]['id'])
        assert new_tasks[0]['host'] == host, \
            "The task got restarted on {}, but it was supposed to stay on {}".format(new_tasks[0]['host'], host)

    check_for_new_task()
示例#33
0
def test_pinned_task_recovers_on_host():
    """Tests that when a pinned task gets killed, it recovers on the node it was pinned to."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)

    common.kill_process_on_host(host, '[s]leep')
    deployment_wait(service_id=app_id)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_for_new_task():
        new_tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] != new_tasks[0][
            'id'], "The task did not get killed: {}".format(tasks[0]['id'])
        assert new_tasks[0]['host'] == host, \
            "The task got restarted on {}, but it was supposed to stay on {}".format(new_tasks[0]['host'], host)

    check_for_new_task()
示例#34
0
def test_launch_docker_grace_period(marathon_service_name):
    """Tests 'taskKillGracePeriodSeconds' option using a Docker container in a Marathon environment.
       Read more details about this test in `test_root_marathon.py::test_launch_mesos_root_marathon_grace_period`
    """

    app_id = '/launch-docker-grace-period-app'
    app_def = apps.docker_http_server(app_id)
    app_def['container']['docker']['image'] = 'kensipe/python-test'

    default_grace_period = 3
    grace_period = 20
    app_def['taskKillGracePeriodSeconds'] = grace_period
    app_def['cmd'] = 'python test.py'
    task_name = app_id.lstrip('/')

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    tasks = get_service_task(marathon_service_name, task_name)
    assert tasks is not None

    client.scale_app(app_id, 0)
    tasks = get_service_task(marathon_service_name, task_name)
    assert tasks is not None

    # tasks should still be here after the default_graceperiod
    time.sleep(default_grace_period + 1)
    tasks = get_service_task(marathon_service_name, task_name)
    assert tasks is not None

    # but not after the set grace_period
    time.sleep(grace_period)
    assert_that(lambda: get_service_task(marathon_service_name, task_name),
                eventually(equal_to(None), max_attempts=30))
示例#35
0
def test_docker_dns_mapping(marathon_service_name):
    """Tests that a running Docker task is accessible via DNS."""

    app_def = apps.docker_http_server(app_id='/docker-dns-mapping-app')
    app_id = app_def["id"]

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    bad_cmd = 'ping -c 1 docker-test.marathon-user.mesos-bad'
    status, output = run_command_on_master(bad_cmd)
    assert not status

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_dns():
        dnsname = '{}.{}.mesos'.format(app_id.lstrip('/'),
                                       marathon_service_name)
        cmd = 'ping -c 1 {}'.format(dnsname)
        wait_for_dns(dnsname)
        status, output = run_command_on_master(cmd)
        assert status, "ping failed for app using DNS lookup: {}".format(
            dnsname)

    check_dns()
def test_mom_when_mom_process_killed():
    """Launched a task from MoM then killed MoM."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]
    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    with marathon_on_marathon() as client:
        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)
        tasks = client.get_tasks(app_id)
        original_task_id = tasks[0]['id']

        common.kill_process_on_host(common.ip_of_mom(), 'marathon-assembly')
        wait_for_task('marathon', 'marathon-user', 300)
        wait_for_service_endpoint('marathon-user', path="ping")

        @retrying.retry(wait_fixed=1000,
                        stop_max_attempt_number=30,
                        retry_on_exception=common.ignore_exception)
        def check_task_is_back():
            tasks = client.get_tasks(app_id)
            assert tasks[0][
                'id'] == original_task_id, "The task ID has changed"

        check_task_is_back()
示例#37
0
def test_marathon_when_task_agent_bounced():
    """Launch an app and restart the node the task is running on."""

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']
    restart_agent(host)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()
示例#38
0
def test_marathon_when_disconnected_from_zk():
    """Launches an app from Marathon, then knocks out access to ZK from Marathon.
       Verifies the task is preserved.
    """

    app_def = apps.sleep_app()
    app_id = app_def["id"]

    host = common.ip_other_than_mom()
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    tasks = client.get_tasks(app_id)
    original_task_id = tasks[0]['id']

    common.block_iptable_rules_for_seconds(host,
                                           2181,
                                           sleep_seconds=10,
                                           block_input=True,
                                           block_output=False)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_task_is_back():
        tasks = client.get_tasks(app_id)
        assert tasks[0]['id'] == original_task_id, \
            "The task {} got replaced with {}".format(original_task_id, tasks[0]['id'])

    check_task_is_back()
def test_vip_docker_bridge_mode(marathon_service_name):
    """Tests the creation of a VIP from a python command in a docker image using bridge mode.
       the test validates the creation of an app with the VIP label and the accessability
       of the service via the VIP.
    """

    app_def = apps.docker_http_server(app_id='vip-docker-bridge-mode-app')
    app_id = app_def["id"]

    vip_name = app_id.lstrip("/")
    fqn = '{}.{}.l4lb.thisdcos.directory'.format(vip_name, marathon_service_name)

    app_def['id'] = vip_name
    app_def['container']['docker']['portMappings'] = [{
        "containerPort": 8080,
        "hostPort": 0,
        "labels": {
            "VIP_0": "/{}:10000".format(vip_name)
        },
        "protocol": "tcp",
        "name": "{}".format(vip_name)
    }]

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def http_output_check():
        time.sleep(1)
        common.assert_http_code('{}:{}'.format(fqn, 10000))

    http_output_check()
def test_launch_and_scale_group():
    """Launches and scales a group."""

    group_def = groups.sleep_group()
    groups_id = group_def["groups"][0]["id"]
    app1_id = group_def["groups"][0]["apps"][0]["id"]
    app2_id = group_def["groups"][0]["apps"][1]["id"]

    client = marathon.create_client()
    client.create_group(group_def)

    deployment_wait(service_id=app1_id)

    group_apps = client.get_group(groups_id)
    apps = group_apps['apps']
    assert len(apps) == 2, "The number of apps is {}, but 2 was expected".format(len(apps))

    tasks1 = client.get_tasks(app1_id)
    tasks2 = client.get_tasks(app2_id)
    assert len(tasks1) == 1, "The number of tasks #1 is {} after deployment, but 1 was expected".format(len(tasks1))
    assert len(tasks2) == 1, "The number of tasks #2 is {} after deployment, but 1 was expected".format(len(tasks2))

    # scale by 2 for the entire group
    client.scale_group(groups_id, 2)
    deployment_wait(service_id=app1_id)

    tasks1 = client.get_tasks(app1_id)
    tasks2 = client.get_tasks(app2_id)
    assert len(tasks1) == 2, "The number of tasks #1 is {} after scale, but 2 was expected".format(len(tasks1))
    assert len(tasks2) == 2, "The number of tasks #2 is {} after scale, but 2 was expected".format(len(tasks2))
def test_scale_app_in_group():
    """Scales an individual app in a group."""

    group_def = groups.sleep_group()
    groups_id = group_def["groups"][0]["id"]
    app1_id = group_def["groups"][0]["apps"][0]["id"]
    app2_id = group_def["groups"][0]["apps"][1]["id"]

    client = marathon.create_client()
    client.create_group(group_def)

    deployment_wait(service_id=app1_id)

    group_apps = client.get_group(groups_id)
    apps = group_apps['apps']
    assert len(apps) == 2, "The number of apps is {}, but 2 was expected".format(len(apps))

    tasks1 = client.get_tasks(app1_id)
    tasks2 = client.get_tasks(app2_id)
    assert len(tasks1) == 1, "The number of tasks #1 is {} after deployment, but 1 was expected".format(len(tasks1))
    assert len(tasks2) == 1, "The number of tasks #2 is {} after deployment, but 1 was expected".format(len(tasks2))

    # scaling just one app in the group
    client.scale_app(app1_id, 2)
    deployment_wait(service_id=app1_id)

    tasks1 = client.get_tasks(app1_id)
    tasks2 = client.get_tasks(app2_id)
    assert len(tasks1) == 2, "The number of tasks #1 is {} after scale, but 2 was expected".format(len(tasks1))
    assert len(tasks2) == 1, "The number of tasks #2 is {} after scale, but 1 was expected".format(len(tasks2))
def test_install_marathon():
    """Install the Marathon package for DC/OS.
    """

    # Install
    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def install_marathon():
        install_package_and_wait(PACKAGE_NAME)

    install_marathon()
    assert package_installed(PACKAGE_NAME), 'Package failed to install'

    # 5000ms = 5 seconds, 5 seconds * 60 attempts = 300 seconds = WAIT_TIME_IN_SECS
    @retrying.retry(wait_fixed=5000,
                    stop_max_attempt_number=60,
                    retry_on_exception=common.ignore_exception)
    def assert_service_registration(package, service):
        found = get_service(package) is not None
        assert found and service_healthy(
            service
        ), f"Service {package} did not register with DCOS"  # NOQA E999

    assert_service_registration(PACKAGE_NAME, SERVICE_NAME)
    deployment_wait(service_id=SERVICE_NAME)

    # Uninstall
    uninstall('marathon-user')
    deployment_wait(service_id=SERVICE_NAME)

    # Reinstall
    install_package_and_wait(PACKAGE_NAME)
    assert package_installed(PACKAGE_NAME), 'Package failed to reinstall'
def test_install_marathon():
    """Install the Marathon package for DC/OS.
    """

    # Install
    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def install_marathon():
        install_package_and_wait(PACKAGE_NAME)

    install_marathon()
    assert package_installed(PACKAGE_NAME), 'Package failed to install'

    # 5000ms = 5 seconds, 5 seconds * 60 attempts = 300 seconds = WAIT_TIME_IN_SECS
    @retrying.retry(wait_fixed=5000, stop_max_attempt_number=60, retry_on_exception=common.ignore_exception)
    def assert_service_registration(package, service):
        found = get_service(package) is not None
        assert found and service_healthy(service), f"Service {package} did not register with DCOS" # NOQA E999

    assert_service_registration(PACKAGE_NAME, SERVICE_NAME)
    deployment_wait(service_id=SERVICE_NAME)

    # Uninstall
    uninstall('marathon-user')
    deployment_wait(service_id=SERVICE_NAME)

    # Reinstall
    install_package_and_wait(PACKAGE_NAME)
    assert package_installed(PACKAGE_NAME), 'Package failed to reinstall'
def test_pinned_task_does_not_scale_to_unpinned_host():
    """Tests when a task lands on a pinned node (and barely fits) and it is asked to scale past
       the resources of that node, no tasks will be launched on any other node.
    """

    app_def = apps.sleep_app()
    app_id = app_def['id']

    host = common.ip_other_than_mom()
    logger.info('Constraint set to host: {}'.format(host))
    # the size of cpus is designed to be greater than 1/2 of a node
    # such that only 1 task can land on the node.
    cores = common.cpus_on_agent(host)
    app_def['cpus'] = max(0.6, cores - 0.5)
    common.pin_to_host(app_def, host)

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)
    client.scale_app(app_id, 2)

    time.sleep(5)
    deployments = client.get_deployments(app_id=app_id)
    tasks = client.get_tasks(app_id)

    # still deploying
    assert len(deployments) == 1, "The number of deployments is {}, but 1 was expected".format(len(deployments))
    assert len(tasks) == 1, "The number of tasks is {}, but 1 was expected".format(len(tasks))
示例#45
0
def clear_pods():
    try:
        client = marathon.create_client()
        pods = client.list_pod()
        for pod in pods:
            client.remove_pod(pod["id"], True)
            deployment_wait(service_id=pod["id"])
    except Exception:
        pass
示例#46
0
def clear_pods():
    try:
        client = marathon.create_client()
        pods = client.list_pod()
        for pod in pods:
            client.remove_pod(pod["id"], True)
            deployment_wait(service_id=pod["id"])
    except Exception:
        pass
def test_install_universe_package(package):
    """ Marathon is responsible for installing packages from the universe.
        This test confirms that several packages are installed into a healty state.
    """

    install_package_and_wait(package)
    assert package_installed(package), 'Package failed to install'

    deployment_wait(max_attempts=300)
    assert service_healthy(package)
def test_install_universe_package(package):
    """ Marathon is responsible for installing packages from the universe.
        This test confirms that several packages are installed into a healty state.
    """

    install_package_and_wait(package)
    assert package_installed(package), 'Package failed to install'

    deployment_wait(max_attempts=300)
    assert service_healthy(package)
示例#49
0
def test_app_secret_env_var(secret_fixture):

    secret_name, secret_value = secret_fixture

    app_id = '/app-secret-env-var-{}'.format(uuid.uuid4().hex)
    app_def = {
        "id":
        app_id,
        "instances":
        1,
        "cpus":
        0.5,
        "mem":
        64,
        "cmd":
        "echo $SECRET_ENV >> $MESOS_SANDBOX/secret-env && /opt/mesosphere/bin/python -m http.server $PORT_API",
        "env": {
            "SECRET_ENV": {
                "secret": "secret1"
            }
        },
        "portDefinitions": [{
            "port": 0,
            "protocol": "tcp",
            "name": "api",
            "labels": {}
        }],
        "secrets": {
            "secret1": {
                "source": secret_name
            }
        }
    }

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(
        tasks) == 1, 'Failed to start the secret environment variable app'

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/secret-env".format(host, port)

    @retrying.retry(wait_fixed=1000,
                    stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def value_check():
        status, data = run_command_on_master(cmd)
        assert status, "{} did not succeed".format(cmd)
        assert data.rstrip() == secret_value

    value_check()
示例#50
0
def test_app_file_based_secret(secret_fixture):

    secret_name, secret_value = secret_fixture
    secret_container_path = 'mysecretpath'

    app_id = '/app-fbs-{}'.format(uuid.uuid4().hex)
    # In case you're wondering about the `cmd`: secrets are mounted via tmpfs inside
    # the container and are not visible outside, hence the intermediate file
    app_def = {
        "id": app_id,
        "instances": 1,
        "cpus": 0.5,
        "mem": 64,
        "cmd": "cat {} >> {}_file && /opt/mesosphere/bin/python -m http.server $PORT_API".format(
            secret_container_path, secret_container_path),
        "container": {
            "type": "MESOS",
            "volumes": [{
                "containerPath": secret_container_path,
                "secret": "secret1"
            }]
        },
        "portDefinitions": [{
            "port": 0,
            "protocol": "tcp",
            "name": "api",
            "labels": {}
        }],
        "secrets": {
            "secret1": {
                "source": secret_name
            }
        }
    }

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1, 'Failed to start the file based secret app'

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    # The secret by default is saved in $MESOS_SANDBOX/.secrets/path/to/secret
    cmd = "curl {}:{}/{}_file".format(host, port, secret_container_path)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def value_check():
        status, data = run_command_on_master(cmd)
        assert status, "{} did not succeed. status = {}, data = {}".format(cmd, status, data)
        assert data.rstrip() == secret_value, "Got an unexpected secret data"

    value_check()
示例#51
0
def test_app_file_based_secret(secret_fixture):

    secret_name, secret_value = secret_fixture
    secret_container_path = 'mysecretpath'

    app_id = '/app-fbs-{}'.format(uuid.uuid4().hex)
    # In case you're wondering about the `cmd`: secrets are mounted via tmpfs inside
    # the container and are not visible outside, hence the intermediate file
    app_def = {
        "id": app_id,
        "instances": 1,
        "cpus": 0.5,
        "mem": 64,
        "cmd": "cat {} >> {}_file && /opt/mesosphere/bin/python -m http.server $PORT_API".format(
            secret_container_path, secret_container_path),
        "container": {
            "type": "MESOS",
            "volumes": [{
                "containerPath": secret_container_path,
                "secret": "secret1"
            }]
        },
        "portDefinitions": [{
            "port": 0,
            "protocol": "tcp",
            "name": "api",
            "labels": {}
        }],
        "secrets": {
            "secret1": {
                "source": secret_name
            }
        }
    }

    client = marathon.create_client()
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1, 'Failed to start the file based secret app'

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    # The secret by default is saved in $MESOS_SANDBOX/.secrets/path/to/secret
    cmd = "curl {}:{}/{}_file".format(host, port, secret_container_path)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def value_check():
        status, data = run_command_on_master(cmd)
        assert status, "{} did not succeed. status = {}, data = {}".format(cmd, status, data)
        assert data.rstrip() == secret_value, "Got an unexpected secret data"

    value_check()
def uninstall(service, package=PACKAGE_NAME):
    try:
        task = get_service_task(package, service)
        if task is not None:
            cosmos_pm = packagemanager.PackageManager(cosmos.get_cosmos_url())
            cosmos_pm.uninstall_app(package, True, service)
            deployment_wait()
            assert common.wait_for_service_endpoint_removal('test-marathon')
            delete_zk_node('/universe/{}'.format(service))

    except Exception:
        pass
def test_deploy_custom_framework():
    """Launches an app that has necessary elements to create a service endpoint in DCOS.
       This test confirms that the endpoint is created by the root Marathon.
    """

    client = marathon.create_client()
    app_def = apps.fake_framework()
    app_id = app_def["id"]
    client.add_app(app_def)
    deployment_wait(service_id=app_id, max_attempts=300)

    shakedown.dcos.service.wait_for_service_endpoint('pyfw', timedelta(minutes=5).total_seconds())
def test_custom_service_name():
    """  Install MoM with a custom service name.
    """
    cosmos_pm = packagemanager.PackageManager(cosmos.get_cosmos_url())
    cosmos_pm.get_package_version('marathon', None)
    options = {
        'service': {'name': "test-marathon"}
    }
    install_package('marathon', options_json=options)
    deployment_wait(service_id=options["service"]["name"], max_attempts=300)

    shakedown.dcos.service.wait_for_service_endpoint('test-marathon', timeout_sec=300, path="ping")
示例#55
0
def test_create_pod():
    """Launch simple pod in DC/OS root marathon."""

    pod_def = pods.simple_pod()
    pod_id = pod_def['id']

    client = marathon.create_client()
    client.add_pod(pod_def)
    deployment_wait(service_id=pod_id)

    pod = client.show_pod(pod_id)
    assert pod is not None, "The pod has not been created"
def simple_sleep_app(mom_endpoint):
    # Deploy a simple sleep app in the MoM-EE
    with marathon_on_marathon(name=mom_endpoint) as client:
        app_def = apps.sleep_app()
        app_id = app_def["id"]

        client.add_app(app_def)
        deployment_wait(service_id=app_id, client=client)

        tasks = get_service_task(mom_endpoint, app_id.lstrip("/"))
        logger.info('MoM-EE tasks: {}'.format(tasks))
        return tasks is not None
def test_app_with_persistent_volume_recovers():
    """Tests that when an app task with a persistent volume gets killed,
       it recovers on the node it was launched on, and it gets attached
       to the same persistent-volume."""

    app_def = apps.persistent_volume_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)

    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    assert len(tasks) == 1, "The number of tasks is {} after deployment, but 1 was expected".format(len(tasks))

    task_id = tasks[0]['id']
    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/data/foo".format(host, port)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task(cmd, target_data):
        run, data = run_command_on_master(cmd)

        assert run, "{} did not succeed".format(cmd)
        assert target_data in data, "'{}' not found in {}".format(target_data, data)

    check_task(cmd, target_data='hello\n')

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def kill_task(host, pattern):
        pids = common.kill_process_on_host(host, pattern)
        assert len(pids) != 0, "no task got killed on {} for pattern {}".format(host, pattern)

    kill_task(host, '[h]ttp\\.server')

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_task_recovery():
        tasks = client.get_tasks(app_id)
        assert len(tasks) == 1, "The number of tasks is {} after recovery, but 1 was expected".format(len(tasks))

        new_task_id = tasks[0]['id']
        assert task_id != new_task_id, "The task ID has not changed, and is still {}".format(task_id)

    check_task_recovery()

    port = tasks[0]['ports'][0]
    host = tasks[0]['host']
    cmd = "curl {}:{}/data/foo".format(host, port)

    check_task(cmd, target_data='hello\nhello\n')
def test_https_readiness_check_ready():
    """Tests HTTPS readiness check using a prepared nginx image that enables
       SSL (using self-signed certificate) and listens on 443.
    """

    client = marathon.create_client()
    app_def = apps.app_with_https_readiness_checks()
    app_id = app_def["id"]

    client.add_app(app_def)

    # when readiness check keeps failing, the deployment will never finish
    deployment_wait(service_id=app_id, max_attempts=300)
示例#59
0
def test_launch_app_on_public_agent():
    """ Test the successful launch of a mesos container on public agent.
        MoMs by default do not have slave_public access.
    """
    client = marathon.create_client()
    app_def = common.add_role_constraint_to_app_def(apps.mesos_app(), ['slave_public'])
    app_id = app_def["id"]
    client.add_app(app_def)
    deployment_wait(service_id=app_id)

    tasks = client.get_tasks(app_id)
    task_ip = tasks[0]['host']

    assert task_ip in get_public_agents(), "The application task got started on a private agent"