Python kill_task_with_pattern示例，sdk_cmd.kill_task_with_pattern Python示例

示例#1

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_config_update_then_scheduler_died():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_marathon.get_scheduler_host(config.SERVICE_NAME)
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('helloworld.scheduler.Main', host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#2

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_config_update_then_kill_task_in_node():
    # kill 1 of 2 world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('world', 'world-0-server.{}.mesos'.format(config.SERVICE_NAME))
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#3

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_kill_hello_node():
    config.check_running()
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0')
    sdk_cmd.kill_task_with_pattern('hello', 'hello-0-server.hello-world.mesos')
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0', hello_ids)

    config.check_running()

示例#4

0

显示文件

def test_config_update_then_kill_task_in_node():
    # kill 1 of 2 world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('world', 'world-0-server.{}.mesos'.format(config.SERVICE_NAME))
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#5

0

显示文件

def test_kill_hello_node():
    config.check_running()
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0')
    sdk_cmd.kill_task_with_pattern('hello', 'hello-0-server.hello-world.mesos')
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0', hello_ids)

    config.check_running()

示例#6

0

显示文件

def test_config_update_then_scheduler_died():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    host = sdk_marathon.get_scheduler_host(config.SERVICE_NAME)
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('helloworld.scheduler.Main', host)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#7

0

显示文件

def test_config_update_then_executor_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern(
        'helloworld.executor.Main',
        'world-0-server.{}.mesos'.format(config.SERVICE_NAME))
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#8

0

显示文件

文件： test_zzzrecovery.py 项目： mesosphere/dcos-commons

def test_kill_hello_task():
    hello_task = sdk_tasks.get_service_tasks(config.SERVICE_NAME, task_prefix="hello-0")[0]

    sdk_cmd.kill_task_with_pattern(
        "hello-container-path/output",
        "nobody",
        agent_host=hello_task.host,
    )

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "hello-0", [hello_task.id])
    check_healthy()

示例#9

0

显示文件

文件： test_zzzrecovery.py 项目： mesosphere/dcos-commons

def test_kill_world_executor():
    world_task = sdk_tasks.get_service_tasks(config.SERVICE_NAME, task_prefix="world-0")[0]

    sdk_cmd.kill_task_with_pattern(
        "mesos-default-executor",
        "nobody",
        agent_host=world_task.host,
    )

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "world-0", [world_task.id])
    check_healthy()

示例#10

0

显示文件

文件： test_sanity.py 项目： thebijuus/dcos-commons

def test_kill_data_node():
    foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME)
    data_ids = sdk_tasks.get_task_ids(foldered_name, 'data-0')
    journal_ids = sdk_tasks.get_task_ids(foldered_name, 'journal')
    name_ids = sdk_tasks.get_task_ids(foldered_name, 'name')

    sdk_cmd.kill_task_with_pattern('datanode', sdk_hosts.system_host(foldered_name, 'data-0-node'))
    config.expect_recovery(service_name=foldered_name)
    sdk_tasks.check_tasks_updated(foldered_name, 'data', data_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, 'journal', journal_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, 'name', name_ids)

示例#11

0

显示文件

文件： test_sanity.py 项目： smush618/dcos-commons

def test_kill_data_node():
    data_task = sdk_tasks.get_service_tasks(foldered_name, "data-0")[0]
    journal_ids = sdk_tasks.get_task_ids(foldered_name, "journal")
    name_ids = sdk_tasks.get_task_ids(foldered_name, "name")

    sdk_cmd.kill_task_with_pattern("datanode", "nobody", agent_host=data_task.host)

    config.expect_recovery(service_name=foldered_name)
    sdk_tasks.check_tasks_updated(foldered_name, "data", [data_task.id])
    sdk_tasks.check_tasks_not_updated(foldered_name, "journal", journal_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, "name", name_ids)

示例#12

0

显示文件

文件： test_sanity.py 项目： mesosphere/dcos-commons

def test_kill_data_node():
    data_task = sdk_tasks.get_service_tasks(foldered_name, "data-0")[0]
    journal_ids = sdk_tasks.get_task_ids(foldered_name, "journal")
    name_ids = sdk_tasks.get_task_ids(foldered_name, "name")

    sdk_cmd.kill_task_with_pattern("datanode", "nobody", agent_host=data_task.host)

    config.expect_recovery(service_name=foldered_name)
    sdk_tasks.check_tasks_updated(foldered_name, "data", [data_task.id])
    sdk_tasks.check_tasks_not_updated(foldered_name, "journal", journal_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, "name", name_ids)

示例#13

0

显示文件

文件： test_sanity.py 项目： mesosphere/dcos-commons

def test_losing_and_regaining_index_health(default_populated_index: None) -> None:
    config.check_elasticsearch_index_health(index_name, "green", service_name=service_name)
    sdk_cmd.kill_task_with_pattern(
        "data__.*Elasticsearch",
        "nobody",
        agent_host=sdk_tasks.get_service_tasks(service_name, "data-0-node")[0].host,
    )
    config.check_elasticsearch_index_health(index_name, "yellow", service_name=service_name)
    config.check_elasticsearch_index_health(index_name, "green", service_name=service_name)

    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)

示例#14

0

显示文件

文件： test_zzzrecovery.py 项目： mesosphere/dcos-commons

def test_kill_all_executors():
    tasks = sdk_tasks.get_service_tasks(config.SERVICE_NAME)

    for task in tasks:
        sdk_cmd.kill_task_with_pattern(
            "mesos-default-executor",
            "nobody",
            agent_host=task.host,
        )

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "", [task.id for task in tasks])
    check_healthy()

示例#15

0

显示文件

def test_kill_hello_task():
    hello_task = sdk_tasks.get_service_tasks(config.SERVICE_NAME,
                                             task_prefix="hello-0")[0]

    sdk_cmd.kill_task_with_pattern(
        "hello-container-path/output",
        "nobody",
        agent_host=hello_task.host,
    )

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "hello-0",
                                  [hello_task.id])
    check_healthy()

示例#16

0

显示文件

def test_kill_world_executor():
    world_task = sdk_tasks.get_service_tasks(config.SERVICE_NAME,
                                             task_prefix="world-0")[0]

    sdk_cmd.kill_task_with_pattern(
        "mesos-default-executor",
        "nobody",
        agent_host=world_task.host,
    )

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "world-0",
                                  [world_task.id])
    check_healthy()

示例#17

0

显示文件

def test_kill_all_executors():
    tasks = sdk_tasks.get_service_tasks(config.SERVICE_NAME)

    for task in tasks:
        sdk_cmd.kill_task_with_pattern(
            "mesos-default-executor",
            "nobody",
            agent_host=task.host,
        )

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "",
                                  [task.id for task in tasks])
    check_healthy()

示例#18

0

显示文件

def test_integrity_on_data_node_failure():
    """
    Verifies proper data replication among data nodes.
    """
    test_filename = get_unique_filename("test_datanode_fail")

    # An HDFS write will only successfully return when the data replication has taken place
    config.write_data_to_hdfs(config.SERVICE_NAME, test_filename)

    sdk_cmd.kill_task_with_pattern("DataNode", sdk_hosts.system_host(config.SERVICE_NAME, 'data-0-node'))
    sdk_cmd.kill_task_with_pattern("DataNode", sdk_hosts.system_host(config.SERVICE_NAME, 'data-1-node'))

    config.read_data_from_hdfs(config.SERVICE_NAME, test_filename)

    config.check_healthy(service_name=config.SERVICE_NAME)

示例#19

0

显示文件

def test_master_reelection():
    initial_master = config.get_elasticsearch_master(service_name=foldered_name)
    sdk_cmd.kill_task_with_pattern(
        "master__.*Elasticsearch",
        "nobody",
        agent_host=sdk_tasks.get_service_tasks(foldered_name, initial_master)[0].host,
    )
    sdk_plan.wait_for_in_progress_recovery(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)
    config.wait_for_expected_nodes_to_exist(service_name=foldered_name)
    new_master = config.get_elasticsearch_master(service_name=foldered_name)
    assert new_master.startswith("master") and new_master != initial_master

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)

示例#20

0

显示文件

文件： test_sanity.py 项目： mesosphere/dcos-commons

def test_master_reelection() -> None:
    initial_master = config.get_elasticsearch_master(service_name=service_name)
    sdk_cmd.kill_task_with_pattern(
        "master__.*Elasticsearch",
        "nobody",
        agent_host=sdk_tasks.get_service_tasks(service_name, initial_master)[0].host,
    )
    sdk_plan.wait_for_in_progress_recovery(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)
    config.wait_for_expected_nodes_to_exist(service_name=service_name)
    new_master = config.get_elasticsearch_master(service_name=service_name)
    assert new_master.startswith("master") and new_master != initial_master

    sdk_plan.wait_for_completed_deployment(service_name)
    sdk_plan.wait_for_completed_recovery(service_name)

示例#21

0

显示文件

def test_kill_scheduler():
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(
        config.SERVICE_NAME)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(scheduler_ids) == 1, "Expected to find one scheduler task"

    sdk_cmd.kill_task_with_pattern(
        "./hello-world-scheduler/bin/helloworld",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(config.SERVICE_NAME),
    )

    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix,
                                  scheduler_ids)
    check_healthy()

示例#22

0

显示文件

文件： test_zzzrecovery.py 项目： mesosphere/dcos-commons

def test_kill_scheduler():
    task_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "")
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(config.SERVICE_NAME)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(scheduler_ids) == 1, "Expected to find ONLY one scheduler task but found {}".format(scheduler_ids)

    sdk_cmd.kill_task_with_pattern(
        "./hello-world-scheduler/bin/helloworld",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(config.SERVICE_NAME),
    )

    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix, scheduler_ids)
    sdk_tasks.wait_for_active_framework(config.SERVICE_NAME)
    config.check_running()
    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, "", task_ids)

示例#23

0

显示文件

def test_kill_scheduler():
    task_ids = sdk_tasks.get_task_ids(foldered_name, "")
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(foldered_name)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(scheduler_ids) == 1, "Expected to find one scheduler task"

    sdk_cmd.kill_task_with_pattern(
        "./hdfs-scheduler/bin/hdfs",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(foldered_name),
    )

    # scheduler should be restarted, but service tasks should be left as-is:
    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix, scheduler_ids)
    sdk_tasks.check_tasks_not_updated(foldered_name, "", task_ids)
    config.check_healthy(service_name=foldered_name)

示例#24

0

显示文件

def test_config_updates_then_all_executors_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('helloworld.executor.Main', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#25

0

显示文件

def test_kill_essential():
    '''kill the essential task, verify that both tasks are relaunched against a matching executor'''
    verify_shared_executor('hello-0')

    old_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0')
    assert len(old_ids) == 2

    sdk_cmd.kill_task_with_pattern(
        'shared-volume/essential', # hardcoded in cmd, see yml
        sdk_hosts.system_host(config.SERVICE_NAME, 'hello-0-essential'))

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0', old_ids) # wait for ids to change...
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME) # ...and for tasks to be up and running

    # the first verify_shared_executor call deleted the files. both should have come back via the relaunch.
    verify_shared_executor('hello-0', delete_files=False) # leave files as-is for the next test

示例#26

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_config_updates_then_all_executors_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('helloworld.executor.Main', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#27

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('world', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#28

0

显示文件

def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('world', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#29

0

显示文件

文件： test_overlay.py 项目： waldonhendricks/dcos-commons

def test_integrity_on_name_node_failure(hdfs_client):
    """
    The first name node (name-0-node) is the active name node by default when HDFS gets installed.
    This test checks that it is possible to write and read data after the active name node fails
    so as to verify a failover sustains expected functionality.
    """

    @retrying.retry(
        wait_fixed=1000,
        stop_max_delay=config.DEFAULT_HDFS_TIMEOUT * 1000
    )
    def _get_active_name_node():
        for candidate in ("name-0-node", "name-1-node"):
            if is_name_node_active(candidate):
                return candidate
        raise Exception("Failed to determine active name node")

    active_name_node = _get_active_name_node()
    sdk_cmd.kill_task_with_pattern(
        "NameNode",
        "nobody",
        agent_host=sdk_tasks.get_service_tasks(config.SERVICE_NAME, active_name_node)[0].host,
    )

    # After the previous active namenode was killed, the opposite namenode should marked active:
    if active_name_node == "name-1-node":
        new_active_name_node = "name-0-node"
    else:
        new_active_name_node = "name-1-node"

    @retrying.retry(
        wait_fixed=1000,
        stop_max_delay=config.DEFAULT_HDFS_TIMEOUT * 1000,
        retry_on_result=lambda res: not res,
    )
    def _wait_for_failover_to_complete(namenode):
        return is_name_node_active(namenode)

    _wait_for_failover_to_complete(new_active_name_node)

    test_filename = config.get_unique_filename("test_namenode_fail")

    config.hdfs_client_write_data(test_filename)
    config.hdfs_client_read_data(test_filename)

    config.check_healthy(config.SERVICE_NAME)

示例#30

0

显示文件

文件： test_overlay.py 项目： waldonhendricks/dcos-commons

def test_integrity_on_data_node_failure(hdfs_client):
    """
    Verifies proper data replication among data nodes.
    """
    test_filename = config.get_unique_filename("test_datanode_fail")

    # An HDFS write will only successfully return when the data replication has taken place
    config.hdfs_client_write_data(test_filename)

    # Should have 3 data nodes (data-0,1,2), kill 2 of them:
    data_tasks = sdk_tasks.get_service_tasks(config.SERVICE_NAME, "data")
    for idx in range(2):
        sdk_cmd.kill_task_with_pattern("DataNode", "nobody", agent_host=data_tasks[idx].host)

    config.hdfs_client_read_data(test_filename)

    config.check_healthy(config.SERVICE_NAME)

示例#31

0

显示文件

文件： test_sanity.py 项目： mesosphere/dcos-commons

def test_kill_scheduler():
    task_ids = sdk_tasks.get_task_ids(foldered_name, "")
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(foldered_name)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(scheduler_ids) == 1, "Expected to find one scheduler task"

    sdk_cmd.kill_task_with_pattern(
        "./hdfs-scheduler/bin/hdfs",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(foldered_name),
    )

    # scheduler should be restarted, but service tasks should be left as-is:
    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix, scheduler_ids)
    sdk_tasks.wait_for_active_framework(foldered_name)
    sdk_tasks.check_tasks_not_updated(foldered_name, "", task_ids)
    config.check_healthy(service_name=foldered_name)

示例#32

0

显示文件

def test_losing_and_regaining_index_health(default_populated_index):
    config.check_elasticsearch_index_health(
        config.DEFAULT_INDEX_NAME, "green", service_name=foldered_name
    )
    sdk_cmd.kill_task_with_pattern(
        "data__.*Elasticsearch",
        "nobody",
        agent_host=sdk_tasks.get_service_tasks(foldered_name, "data-0-node")[0].host,
    )
    config.check_elasticsearch_index_health(
        config.DEFAULT_INDEX_NAME, "yellow", service_name=foldered_name
    )
    config.check_elasticsearch_index_health(
        config.DEFAULT_INDEX_NAME, "green", service_name=foldered_name
    )

    sdk_plan.wait_for_completed_deployment(foldered_name)
    sdk_plan.wait_for_completed_recovery(foldered_name)

示例#33

0

显示文件

def test_kill_nonessential():
    '''kill the nonessential task, verify that the nonessential task is relaunched against the same executor as before'''
    verify_shared_executor('hello-0')

    old_essential_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0-essential')
    assert len(old_essential_ids) == 1
    old_nonessential_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0-nonessential')
    assert len(old_nonessential_ids) == 1

    sdk_cmd.kill_task_with_pattern(
        'shared-volume/nonessential', # hardcoded in cmd, see yml
        sdk_hosts.system_host(config.SERVICE_NAME, 'hello-0-nonessential'))

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0-nonessential', old_nonessential_ids)
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)
    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, 'hello-0-essential', old_essential_ids)

    # the first verify_shared_executor call deleted the files. only the nonessential file came back via its relaunch.
    verify_shared_executor('hello-0', expected_files=['nonessential'])

示例#34

0

显示文件

文件： test_mount_volumes.py 项目： keithchambers/dcos-commons

def test_kill_agent():
    '''kill the agent task, verify that the agent task is relaunched against the same executor as before'''
    verify_shared_executor('hello-0')

    old_node_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0-node')
    assert len(old_node_ids) == 1
    old_agent_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0-agent')
    assert len(old_agent_ids) == 1

    sdk_cmd.kill_task_with_pattern(
        'agent-container-path/output',  # hardcoded in cmd, see yml
        sdk_hosts.system_host(config.SERVICE_NAME, 'hello-0-agent'))

    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, 'hello-0-node', old_node_ids)
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0-agent', old_agent_ids)

    # the first verify_shared_executor call deleted the files. only the nonessential file came back via its relaunch.
    verify_shared_executor('hello-0')

示例#35

0

显示文件

def test_integrity_on_name_node_failure():
    """
    The first name node (name-0-node) is the active name node by default when HDFS gets installed.
    This test checks that it is possible to write and read data after the active name node fails
    so as to verify a failover sustains expected functionality.
    """
    active_name_node = config.get_active_name_node(config.SERVICE_NAME)
    sdk_cmd.kill_task_with_pattern("NameNode", sdk_hosts.system_host(config.SERVICE_NAME, active_name_node))

    predicted_active_name_node = "name-1-node"
    if active_name_node == "name-1-node":
        predicted_active_name_node = "name-0-node"

    wait_for_failover_to_complete(predicted_active_name_node)

    test_filename = get_unique_filename("test_namenode_fail")
    config.write_data_to_hdfs(config.SERVICE_NAME, test_filename)
    config.read_data_from_hdfs(config.SERVICE_NAME, test_filename)

    config.check_healthy(service_name=config.SERVICE_NAME)

示例#36

0

显示文件

文件： test_mount_volumes.py 项目： rishabh96b/dcos-commons

def test_kill_agent():
    """kill the agent task, verify that the agent task is relaunched against the same executor as before"""
    verify_shared_executor("hello-0")

    old_tasks = sdk_tasks.get_service_tasks(config.SERVICE_NAME, "hello-0")
    assert len(old_tasks) == 2
    old_node_task = [t for t in old_tasks if t.name == "hello-0-node"][0]
    old_agent_task = [t for t in old_tasks if t.name == "hello-0-agent"][0]

    sdk_cmd.kill_task_with_pattern(
        "agent-container-path/output",  # hardcoded in cmd, see yml
        "nobody",
        agent_host=old_agent_task.host,
    )

    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, "hello-0-node", [old_node_task.id])
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "hello-0-agent", [old_agent_task.id])

    # the first verify_shared_executor call deleted the files. only the nonessential file came back via its relaunch.
    verify_shared_executor("hello-0")

示例#37

0

显示文件

文件： test_mount_volumes.py 项目： mesosphere/dcos-commons

def test_kill_agent():
    """kill the agent task, verify that the agent task is relaunched against the same executor as before"""
    verify_shared_executor("hello-0")

    old_tasks = sdk_tasks.get_service_tasks(config.SERVICE_NAME, "hello-0")
    assert len(old_tasks) == 2
    old_node_task = [t for t in old_tasks if t.name == "hello-0-node"][0]
    old_agent_task = [t for t in old_tasks if t.name == "hello-0-agent"][0]

    sdk_cmd.kill_task_with_pattern(
        "agent-container-path/output",  # hardcoded in cmd, see yml
        "nobody",
        agent_host=old_agent_task.host,
    )

    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, "hello-0-node", [old_node_task.id])
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "hello-0-agent", [old_agent_task.id])

    # the first verify_shared_executor call deleted the files. only the nonessential file came back via its relaunch.
    verify_shared_executor("hello-0")

示例#38

0

显示文件

def test_kill_essential():
    """kill the essential task, verify that both tasks are relaunched against a matching executor"""
    verify_shared_executor("hello-0")

    old_tasks = sdk_tasks.get_service_tasks(config.SERVICE_NAME, "hello-0")
    assert len(old_tasks) == 2

    # kill the essential task process. both tasks are on the same pod, so same host:
    sdk_cmd.kill_task_with_pattern(
        "shared-volume/essential",  # hardcoded in cmd, see yml
        "nobody",
        agent_host=old_tasks[0].host,
    )

    # wait for both task ids to change...
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "hello-0", [t.id for t in old_tasks])
    # ...and for tasks to be up and running
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)

    # the first verify_shared_executor call deleted the files. both should have come back via the relaunch.
    verify_shared_executor("hello-0", delete_files=False)  # leave files as-is for the next test

示例#39

0

显示文件

def test_kill_scheduler():
    task_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "")
    scheduler_task_prefix = sdk_marathon.get_scheduler_task_prefix(
        config.SERVICE_NAME)
    scheduler_ids = sdk_tasks.get_task_ids("marathon", scheduler_task_prefix)
    assert len(
        scheduler_ids
    ) == 1, "Expected to find ONLY one scheduler task but found {}".format(
        scheduler_ids)

    sdk_cmd.kill_task_with_pattern(
        "./hello-world-scheduler/bin/helloworld",
        "nobody",
        agent_host=sdk_marathon.get_scheduler_host(config.SERVICE_NAME),
    )

    sdk_tasks.check_tasks_updated("marathon", scheduler_task_prefix,
                                  scheduler_ids)
    sdk_tasks.wait_for_active_framework(config.SERVICE_NAME)
    config.check_running()
    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, "", task_ids)

示例#40

0

显示文件

def test_supervise_conflict_frameworkid():
    job_service_name = "MockTaskRunner"

    @retrying.retry(wait_fixed=1000,
                    stop_max_delay=600 * 1000,
                    retry_on_result=lambda res: not res)
    def wait_job_present(present):
        svc = shakedown.get_service(job_service_name)
        if present:
            return svc is not None
        else:
            return svc is None

    job_args = [
        "--supervise", "--class", "MockTaskRunner", "--conf",
        "spark.cores.max=1", "--conf", "spark.executors.cores=1"
    ]

    try:
        driver_id = utils.submit_job(app_url=utils.dcos_test_jar_url(),
                                     app_args="1 1800",
                                     service_name=utils.SPARK_SERVICE_NAME,
                                     args=job_args)
        log.info("Started supervised driver {}".format(driver_id))

        wait_job_present(True)
        log.info("Job has registered")

        sdk_tasks.check_running(job_service_name, 1)
        log.info("Job has running executors")

        service_info = shakedown.get_service(job_service_name).dict()
        driver_regex = "spark.mesos.driver.frameworkId={}".format(
            service_info['id'])
        kill_status = sdk_cmd.kill_task_with_pattern(driver_regex,
                                                     service_info['hostname'])

        wait_job_present(False)

        wait_job_present(True)
        log.info("Job has re-registered")
        sdk_tasks.check_running(job_service_name, 1)
        log.info("Job has re-started")

        restarted_service_info = shakedown.get_service(job_service_name).dict()
        assert service_info['id'] != restarted_service_info[
            'id'], "Job has restarted with same framework Id"
    finally:
        kill_info = utils.kill_driver(driver_id, utils.SPARK_SERVICE_NAME)
        log.info("{}".format(kill_info))
        assert json.loads(kill_info)["success"], "Failed to kill spark job"
        wait_job_present(False)

示例#41

0

显示文件

def test_kill_nonessential():
    """kill the nonessential task, verify that the nonessential task is relaunched against the same executor as before"""
    verify_shared_executor("hello-0")

    old_tasks = sdk_tasks.get_service_tasks(config.SERVICE_NAME, "hello-0")
    assert len(old_tasks) == 2
    old_essential_task = [t for t in old_tasks if t.name == "hello-0-essential"][0]
    old_nonessential_task = [t for t in old_tasks if t.name == "hello-0-nonessential"][0]

    # kill the nonessential task process. both tasks are in the same pod, so same host:
    sdk_cmd.kill_task_with_pattern(
        "shared-volume/nonessential",  # hardcoded in cmd, see yml
        "nobody",
        agent_host=old_nonessential_task.host,
    )

    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, "hello-0-nonessential", [old_nonessential_task.id])
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)
    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, "hello-0-essential", [old_essential_task.id])

    # the first verify_shared_executor call deleted the files. only the nonessential file came back via its relaunch.
    verify_shared_executor("hello-0", expected_files=["nonessential"])

示例#42

0

显示文件

def test_kill_agent():
    '''kill the agent task, verify that the agent task is relaunched against the same executor as before'''
    verify_shared_executor('hello-0')

    old_node_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello-0-node')
    assert len(old_node_ids) == 1
    old_agent_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME,
                                           'hello-0-agent')
    assert len(old_agent_ids) == 1

    sdk_cmd.kill_task_with_pattern(
        'agent-container-path/output',  # hardcoded in cmd, see yml
        sdk_hosts.system_host(config.SERVICE_NAME, 'hello-0-agent'))

    sdk_tasks.check_tasks_not_updated(config.SERVICE_NAME, 'hello-0-node',
                                      old_node_ids)
    sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME)
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello-0-agent',
                                  old_agent_ids)

    # the first verify_shared_executor call deleted the files. only the nonessential file came back via its relaunch.
    verify_shared_executor('hello-0')

示例#43

0

显示文件

def test_config_update_then_zk_killed():
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello')
    config.bump_hello_cpus()
    sdk_cmd.kill_task_with_pattern('zookeeper')
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello', hello_ids)
    config.check_running()

示例#44

0

显示文件

def test_config_update_then_master_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('mesos-master')
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#45

0

显示文件

def test_zk_killed():
    sdk_cmd.kill_task_with_pattern('zookeeper')
    config.check_running()

示例#46

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_all_executors_killed():
    for host in shakedown.get_service_ips(config.SERVICE_NAME):
        sdk_cmd.kill_task_with_pattern('helloworld.executor.Main', host)
    config.check_running()

示例#47

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_master_killed():
    sdk_cmd.kill_task_with_pattern('mesos-master')
    config.check_running()

示例#48

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_zk_killed():
    sdk_cmd.kill_task_with_pattern('zookeeper')
    config.check_running()

示例#49

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_config_update_then_zk_killed():
    hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello')
    config.bump_hello_cpus()
    sdk_cmd.kill_task_with_pattern('zookeeper')
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'hello', hello_ids)
    config.check_running()

示例#50

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_config_update_then_master_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('mesos-master')
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

示例#51

0

显示文件

文件： test_zzzrecovery.py 项目： keithchambers/dcos-commons

def test_config_update_then_executor_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    config.bump_world_cpus()
    sdk_cmd.kill_task_with_pattern('helloworld.executor.Main', 'world-0-server.{}.mesos'.format(config.SERVICE_NAME))
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()