def get_tasks_avoiding_scheduler(service_name, task_name_pattern):
    '''Returns a list of tasks which are not located on the Scheduler's machine.

    Avoid also killing the system that the scheduler is on. This is just to speed up testing.
    In practice, the scheduler would eventually get relaunched on a different node by Marathon and
    we'd be able to proceed with repairing the service from there. However, it takes 5-20 minutes
    for Mesos to decide that the agent is dead. This is also why we perform a manual 'ls' check to
    verify the host is down, rather than waiting for Mesos to tell us.
    '''
    skip_tasks = {sdk_package_registry.PACKAGE_REGISTRY_SERVICE_NAME}
    server_tasks = [
        task for task in get_summary()
        if task.name not in skip_tasks and task_name_pattern.match(task.name)
    ]

    scheduler_ip = shakedown.get_service_ips('marathon', service_name).pop()
    log.info('Scheduler IP: {}'.format(scheduler_ip))

    # Always avoid package registry (if present)
    registry_ips = shakedown.get_service_ips(
        'marathon', sdk_package_registry.PACKAGE_REGISTRY_SERVICE_NAME)
    log.info('Package Registry [{}] IP(s): {}'.format(
        sdk_package_registry.PACKAGE_REGISTRY_SERVICE_NAME, registry_ips))
    skip_ips = {scheduler_ip} | set(registry_ips)
    avoid_tasks = [task for task in server_tasks if task.host not in skip_ips]
    log.info('Found tasks avoiding scheduler and {} at {}: {}'.format(
        sdk_package_registry.PACKAGE_REGISTRY_SERVICE_NAME, skip_ips,
        avoid_tasks))
    return avoid_tasks
Exemplo n.º 2
0
def test_cleanup_then_all_executors_killed(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup(), lambda:
        [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts])

    check_health()
def test_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    for host in hosts:
        kill_task_with_pattern('CassandraDaemon', host)

    recover_failed_agents(hosts)
    check_health()
def test_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    for host in hosts:
        kill_task_with_pattern('cassandra.executor.Main', host)

    recover_failed_agents(hosts)
    check_health()
Exemplo n.º 5
0
def get_pod_to_replace():
    '''Avoid also killing the system that the scheduler is on. This is just to speed up testing.
    In practice, the scheduler would eventually get relaunched on a different node by Marathon and
    we'd be able to proceed with repairing the service from there. However, it takes 5-20 minutes
    for Mesos to decide that the agent is dead. This is also why we perform a manual 'ls' check to
    verify the host is down, rather than waiting for Mesos to tell us.
    '''
    scheduler_ip = shakedown.get_service_ips('marathon',
                                             config.SERVICE_NAME).pop()
    log.info('Scheduler IP: {}'.format(scheduler_ip))

    pods = {}
    for pod_id in range(0, config.DEFAULT_TASK_COUNT):
        pod_name = 'node-{}'.format(pod_id)
        pods[pod_name] = {
            'name': pod_name,
            'host': get_pod_host(pod_name),
            'agent': get_pod_agent(pod_name)
        }
    log.info('Pods:\n{}'.format(pprint.pformat(pods)))

    replace_pod = None
    for key, value in pods.items():
        if value['host'] != scheduler_ip:
            replace_pod = value
            log.info('Found pod avoiding scheduler at {}: {}'.format(
                scheduler_ip, value))
            break
    return replace_pod
def test_config_update_eventually_succeeds_after_all_brokers_fail():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        increment_broker_port_config,
        lambda: [kill_task_with_pattern('kafka.Kafka', h) for h in hosts])

    check_health()
Exemplo n.º 7
0
def test_repair_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_repair,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts])

    check_health()
def test_data_survives_crash():
    # Generate SQL Commands
    cmd_drop_database = cockroach_cmd('DROP DATABASE IF EXISTS bank;')
    cmd_create_database = cockroach_cmd('CREATE DATABASE bank;')
    cmd_create_table = cockroach_cmd(
        'CREATE TABLE accounts (id INT PRIMARY KEY, balance INT);', 'bank')
    cmd_insert = cockroach_cmd(
        'INSERT INTO accounts (id, balance) VALUES (1, 1000), (2, 250);',
        'bank')
    cmd_select = cockroach_cmd('SELECT id, balance FROM accounts;', 'bank')

    # Run SQL Commands (except cmd_select)
    cmd.run_cli(cmd_drop_database)
    out_create_database = cmd.run_cli(cmd_create_database)
    out_create_table = cmd.run_cli(cmd_create_table)
    out_insert = cmd.run_cli(cmd_insert)

    # Kill All CockroachDB Nodes (one at a time)
    service_ips = shakedown.get_service_ips(SERVICE_NAME)
    for service_ip in service_ips:
        shakedown.kill_process_on_host(
            service_ip, "cockroach start")  # Kill CockroachDB node
        tasks.check_running(SERVICE_NAME, DEFAULT_TASK_COUNT,
                            5 * 60)  # Wait for new CockroachDB node to run
        shakedown.wait_for(lambda: cockroach_nodes_healthy(),
                           noisy=True,
                           timeout_seconds=5 *
                           60)  # Wait for healthy CockroachDB cluster

    # Run cmd_select
    out_select = cmd.run_cli(cmd_select)

    # Confirm Output
    assert '2 rows' in out_select
def test_all_partition():
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    for host in hosts:
        shakedown.partition_agent(host)
    for host in hosts:
        shakedown.reconnect_agent(host)
    config.check_running()
Exemplo n.º 10
0
def test_cleanup_then_kill_all_task_in_node(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts])

    check_health()
Exemplo n.º 11
0
def test_config_updates_then_all_executors_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_tasks.kill_task_with_pattern('helloworld.executor.Main', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Exemplo n.º 12
0
def test_config_update_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        bump_cpu_count_config,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts])

    check_health()
Exemplo n.º 13
0
def test_repair_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_repair, lambda:
        [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts])

    check_health()
Exemplo n.º 14
0
def test_config_update_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        bump_cpu_count_config, lambda:
        [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts])

    check_health()
Exemplo n.º 15
0
def test_shutdown_host_test():
    scheduler_ip = shakedown.get_service_ips('marathon', PACKAGE_NAME).pop()
    sdk_utils.out('marathon ip = {}'.format(scheduler_ip))

    node_ip = None
    for pod_id in range(0, DEFAULT_TASK_COUNT):
        pod_host = get_pod_host(pod_id)
        if pod_host != scheduler_ip:
            node_ip = pod_host
            break

    assert node_ip is not None, 'Could not find a node to shut down'

    old_agent = get_pod_agent(pod_id)
    sdk_utils.out('pod id = {}, node_ip = {}, agent = {}'.format(
        pod_id, node_ip, old_agent))

    task_ids = tasks.get_task_ids(PACKAGE_NAME, 'node-{}'.format(pod_id))

    # instead of partitioning or reconnecting, we shut down the host permanently
    status, stdout = shakedown.run_command_on_agent(node_ip,
                                                    'sudo shutdown -h +1')
    sdk_utils.out('shutdown agent {}: [{}] {}'.format(node_ip, status, stdout))

    assert status is True
    time.sleep(100)

    cmd.run_cli('cassandra pods replace node-{}'.format(pod_id))
    tasks.check_tasks_updated(PACKAGE_NAME, 'node', task_ids)

    # double check that all tasks are running
    tasks.check_running(PACKAGE_NAME, DEFAULT_TASK_COUNT)
    new_agent = get_pod_agent(pod_id)

    assert old_agent != new_agent
Exemplo n.º 16
0
def test_config_updates_then_all_executors_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('helloworld.executor.Main', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
def test_cleanup_then_kill_all_task_in_node(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts]
    )

    check_health()
Exemplo n.º 18
0
def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('world', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
Exemplo n.º 19
0
def test_config_update_eventually_succeeds_after_all_brokers_fail():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        increment_broker_port_config,
        lambda: [kill_task_with_pattern('kafka.Kafka', h) for h in hosts]
    )

    check_health()
Exemplo n.º 20
0
def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('world', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()
def test_config_update_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        bump_cpu_count_config,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts]
    )

    check_health()
def test_repair_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_repair,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts]
    )

    check_health()
Exemplo n.º 23
0
def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    bump_world_cpus()
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    [tasks.kill_task_with_pattern('world', h) for h in hosts]
    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()
def test_cleanup_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    run_planned_operation(
        run_cleanup, lambda:
        [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts],
        lambda: recover_failed_agents(hosts))

    check_health()
def test_cleanup_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    run_planned_operation(
        run_cleanup,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts],
        lambda: recover_failed_agents(hosts))

    check_health()
Exemplo n.º 26
0
def test_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        shakedown.partition_agent(host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()
Exemplo n.º 27
0
def test_service_becomes_healthy_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        spin(shakedown.partition_agent, lambda x: (True, ''), host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()
def test_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        shakedown.partition_agent(host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()
def test_repair_then_kill_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    host = get_node_host()

    run_planned_operation(
        run_repair, lambda: kill_task_with_pattern('CassandraDaemon', host),
        lambda: recover_failed_agents(hosts))

    check_health()
Exemplo n.º 30
0
def test_service_becomes_healthy_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        spin(shakedown.partition_agent, lambda x: (True, ''), host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()
Exemplo n.º 31
0
def get_scheduler_host(service_name):
    # Marathon mangles foldered paths as follows: "/path/to/svc" => "svc.to.path"
    task_name_elems = service_name.lstrip('/').split('/')
    task_name_elems.reverse()
    app_name = '.'.join(task_name_elems)
    ips = shakedown.get_service_ips('marathon', app_name)
    if len(ips) == 0:
        raise Exception('No IPs found for marathon task "{}". Available tasks are: {}'.format(
            app_name, [task['name'] for task in shakedown.get_service_tasks('marathon')]))
    return ips.pop()
def test_cleanup_then_all_executors_killed(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup(),
        lambda: [
            kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts
        ]
    )

    check_health()
Exemplo n.º 33
0
def get_scheduler_host(service_name):
    # Marathon mangles foldered paths as follows: "/path/to/svc" => "svc.to.path"
    task_name_elems = service_name.lstrip('/').split('/')
    task_name_elems.reverse()
    app_name = '.'.join(task_name_elems)
    ips = shakedown.get_service_ips('marathon', app_name)
    if len(ips) == 0:
        raise Exception('No IPs found for marathon task "{}". Available tasks are: {}'.format(
            app_name, [task['name'] for task in shakedown.get_service_tasks('marathon')]))
    return ips.pop()
def test_repair_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_repair,
        lambda: [
            kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts
        ]
    )

    check_health()
def test_config_update_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        bump_cpu_count_config,
        lambda: [
            kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts
        ]
    )

    check_health()
Exemplo n.º 36
0
def test_kill_all_journalnodes():
    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')
    data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data')

    for host in shakedown.get_service_ips(PACKAGE_NAME):
        tasks.kill_task_with_pattern('journalnode', host)

    check_healthy()
    # name nodes fail and restart, so don't check those
    tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids)
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'data', data_ids)
def test_repair_then_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(run_repair, partition)

    check_health()
def test_config_update_then_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(bump_cpu_count_config, partition)

    check_health()
Exemplo n.º 39
0
def test_config_update_then_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(bump_cpu_count_config, partition)

    check_health()
Exemplo n.º 40
0
def test_kill_all_datanodes():
    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')
    name_ids = tasks.get_task_ids(PACKAGE_NAME, 'name')
    data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data')

    for host in shakedown.get_service_ips(PACKAGE_NAME):
        tasks.kill_task_with_pattern('datanode', host)

    check_healthy()
    tasks.check_tasks_updated(PACKAGE_NAME, 'data', data_ids)
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'journal', journal_ids)
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'name', name_ids)
def test_cleanup_then_all_partition(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(run_cleanup, partition)

    check_health()
Exemplo n.º 42
0
def test_repair_then_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(run_repair, partition)

    check_health()
Exemplo n.º 43
0
def test_cleanup_then_all_partition(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(run_cleanup, partition)

    check_health()
Exemplo n.º 44
0
def test_kill_all_journalnodes():
    journal_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'journal')
    data_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'data')

    for host in shakedown.get_service_ips(FOLDERED_SERVICE_NAME):
        sdk_tasks.kill_task_with_pattern('journalnode', host)

    expect_recovery()
    # name nodes fail and restart, so don't check those
    sdk_tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'journal',
                                  journal_ids)
    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, 'data', data_ids)
Exemplo n.º 45
0
def test_config_update_eventually_succeeds_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            spin(shakedown.partition_agent, lambda x: (True, ''), host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(increment_broker_port_config, partition)

    check_health()
Exemplo n.º 46
0
def test_config_update_eventually_succeeds_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            spin(shakedown.partition_agent, lambda x: (True, ''), host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(increment_broker_port_config, partition)

    check_health()
Exemplo n.º 47
0
def get_tasks_avoiding_scheduler(service_name, task_name_pattern):
    '''Returns a list of tasks which are not located on the Scheduler's machine.

    Avoid also killing the system that the scheduler is on. This is just to speed up testing.
    In practice, the scheduler would eventually get relaunched on a different node by Marathon and
    we'd be able to proceed with repairing the service from there. However, it takes 5-20 minutes
    for Mesos to decide that the agent is dead. This is also why we perform a manual 'ls' check to
    verify the host is down, rather than waiting for Mesos to tell us.
    '''
    scheduler_ip = shakedown.get_service_ips('marathon', service_name).pop()
    log.info('Scheduler IP: {}'.format(scheduler_ip))

    server_tasks = [
        task for task in get_summary()
        if task_name_pattern.match(task.name)]

    avoid_tasks = [task for task in server_tasks if task.host != scheduler_ip]
    log.info('Found tasks avoiding scheduler at {}: {}'.format(scheduler_ip, avoid_tasks))
    return avoid_tasks
Exemplo n.º 48
0
def test_shutdown_host_test():
    scheduler_ip = shakedown.get_service_ips('marathon', config.SERVICE_NAME).pop()
    log.info('marathon ip = {}'.format(scheduler_ip))

    node_ip = None
    pod_name = None
    for pod_id in range(0, config.DEFAULT_TASK_COUNT):
        pod_name = 'node-{}'.format(pod_id)
        pod_host = get_pod_host(pod_name)
        if pod_host != scheduler_ip:
            node_ip = pod_host
            break

    assert node_ip is not None, 'Could not find a node to shut down'

    old_agent = get_pod_agent(pod_name)
    log.info('pod name = {}, node_ip = {}, agent = {}'.format(pod_name, node_ip, old_agent))

    task_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, pod_name)

    # instead of partitioning or reconnecting, we shut down the host permanently
    status, stdout = shakedown.run_command_on_agent(node_ip, 'sudo shutdown -h +1')
    log.info('shutdown agent {}: [{}] {}'.format(node_ip, status, stdout))

    assert status is True

    log.info('sleeping 100s after shutting down agent')
    time.sleep(100)

    cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod replace {}'.format(pod_name))
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, pod_name, task_ids)

    # double check that all tasks are running
    sdk_tasks.check_running(config.SERVICE_NAME, config.DEFAULT_TASK_COUNT)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    new_agent = get_pod_agent(pod_name)
    assert old_agent != new_agent
Exemplo n.º 49
0
def test_kill_scheduler():
    sdk_cmd.kill_task_with_pattern('hdfs.scheduler.Main', shakedown.get_service_ips('marathon').pop())
    config.check_healthy(service_name=sdk_utils.get_foldered_name(config.SERVICE_NAME))
Exemplo n.º 50
0
def test_service_becomes_healthy_after_all_brokers_fail():
    for host in shakedown.get_service_ips(PACKAGE_NAME):
        kill_task_with_pattern('kafka.Kafka', host)

    check_health()
Exemplo n.º 51
0
def get_broker_host():
    return shakedown.get_service_ips(PACKAGE_NAME).pop()
Exemplo n.º 52
0
def test_all_executors_killed():
    for host in shakedown.get_service_ips(config.SERVICE_NAME):
        sdk_cmd.kill_task_with_pattern('helloworld.executor.Main', host)
    config.check_running()
Exemplo n.º 53
0
def get_marathon_host():
    return shakedown.get_service_ips('marathon', PACKAGE_NAME).pop()
Exemplo n.º 54
0
def test_kill_scheduler():
    sdk_tasks.kill_task_with_pattern('hdfs.scheduler.Main', shakedown.get_service_ips('marathon').pop())
    config.check_healthy(service_name=FOLDERED_SERVICE_NAME)
def test_kill_all_task_in_node():
    for host in shakedown.get_service_ips(PACKAGE_NAME):
        kill_task_with_pattern('CassandraDaemon', host)

    check_health()
Exemplo n.º 56
0
def ip_of_mom():
    service_ips = shakedown.get_service_ips('marathon', 'marathon-user')
    for mom_ip in service_ips:
        return mom_ip
 def fn():
     try:
         return shakedown.get_service_ips(PACKAGE_NAME)
     except IndexError:
         return set()
def test_all_executors_killed():
    for host in shakedown.get_service_ips(PACKAGE_NAME):
        kill_task_with_pattern('cassandra.executor.Main', host)

    check_health()
def get_scheduler_host():
    return shakedown.get_service_ips('marathon').pop()