Exemplos de get_service_ips em Python, exemplos de shakedown.get_service_ips em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: sdk_tasks.py Projeto: waterdata-github/dcos-jenkins-service

def get_tasks_avoiding_scheduler(service_name, task_name_pattern):
    '''Returns a list of tasks which are not located on the Scheduler's machine.

    Avoid also killing the system that the scheduler is on. This is just to speed up testing.
    In practice, the scheduler would eventually get relaunched on a different node by Marathon and
    we'd be able to proceed with repairing the service from there. However, it takes 5-20 minutes
    for Mesos to decide that the agent is dead. This is also why we perform a manual 'ls' check to
    verify the host is down, rather than waiting for Mesos to tell us.
    '''
    skip_tasks = {sdk_package_registry.PACKAGE_REGISTRY_SERVICE_NAME}
    server_tasks = [
        task for task in get_summary()
        if task.name not in skip_tasks and task_name_pattern.match(task.name)
    ]

    scheduler_ip = shakedown.get_service_ips('marathon', service_name).pop()
    log.info('Scheduler IP: {}'.format(scheduler_ip))

    # Always avoid package registry (if present)
    registry_ips = shakedown.get_service_ips(
        'marathon', sdk_package_registry.PACKAGE_REGISTRY_SERVICE_NAME)
    log.info('Package Registry [{}] IP(s): {}'.format(
        sdk_package_registry.PACKAGE_REGISTRY_SERVICE_NAME, registry_ips))
    skip_ips = {scheduler_ip} | set(registry_ips)
    avoid_tasks = [task for task in server_tasks if task.host not in skip_ips]
    log.info('Found tasks avoiding scheduler and {} at {}: {}'.format(
        sdk_package_registry.PACKAGE_REGISTRY_SERVICE_NAME, skip_ips,
        avoid_tasks))
    return avoid_tasks

Exemplo n.º 2

0

Exibir arquivo

def test_cleanup_then_all_executors_killed(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup(), lambda:
        [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts])

    check_health()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: snitesh91/dcos-cassandra-service

def test_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    for host in hosts:
        kill_task_with_pattern('CassandraDaemon', host)

    recover_failed_agents(hosts)
    check_health()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: snitesh91/dcos-cassandra-service

def test_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    for host in hosts:
        kill_task_with_pattern('cassandra.executor.Main', host)

    recover_failed_agents(hosts)
    check_health()

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test_zzzrecovery.py Projeto: minyk/dcos-activemq

def get_pod_to_replace():
    '''Avoid also killing the system that the scheduler is on. This is just to speed up testing.
    In practice, the scheduler would eventually get relaunched on a different node by Marathon and
    we'd be able to proceed with repairing the service from there. However, it takes 5-20 minutes
    for Mesos to decide that the agent is dead. This is also why we perform a manual 'ls' check to
    verify the host is down, rather than waiting for Mesos to tell us.
    '''
    scheduler_ip = shakedown.get_service_ips('marathon',
                                             config.SERVICE_NAME).pop()
    log.info('Scheduler IP: {}'.format(scheduler_ip))

    pods = {}
    for pod_id in range(0, config.DEFAULT_TASK_COUNT):
        pod_name = 'node-{}'.format(pod_id)
        pods[pod_name] = {
            'name': pod_name,
            'host': get_pod_host(pod_name),
            'agent': get_pod_agent(pod_name)
        }
    log.info('Pods:\n{}'.format(pprint.pformat(pods)))

    replace_pod = None
    for key, value in pods.items():
        if value['host'] != scheduler_ip:
            replace_pod = value
            log.info('Found pod avoiding scheduler at {}: {}'.format(
                scheduler_ip, value))
            break
    return replace_pod

Exemplo n.º 6

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: xinxian0458/dcos-kafka-service

def test_config_update_eventually_succeeds_after_all_brokers_fail():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        increment_broker_port_config,
        lambda: [kill_task_with_pattern('kafka.Kafka', h) for h in hosts])

    check_health()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: wkl/dcos-cassandra-service

def test_repair_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_repair,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts])

    check_health()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: test_basics.py Projeto: stjordanis/dcos-cockroachdb-service

def test_data_survives_crash():
    # Generate SQL Commands
    cmd_drop_database = cockroach_cmd('DROP DATABASE IF EXISTS bank;')
    cmd_create_database = cockroach_cmd('CREATE DATABASE bank;')
    cmd_create_table = cockroach_cmd(
        'CREATE TABLE accounts (id INT PRIMARY KEY, balance INT);', 'bank')
    cmd_insert = cockroach_cmd(
        'INSERT INTO accounts (id, balance) VALUES (1, 1000), (2, 250);',
        'bank')
    cmd_select = cockroach_cmd('SELECT id, balance FROM accounts;', 'bank')

    # Run SQL Commands (except cmd_select)
    cmd.run_cli(cmd_drop_database)
    out_create_database = cmd.run_cli(cmd_create_database)
    out_create_table = cmd.run_cli(cmd_create_table)
    out_insert = cmd.run_cli(cmd_insert)

    # Kill All CockroachDB Nodes (one at a time)
    service_ips = shakedown.get_service_ips(SERVICE_NAME)
    for service_ip in service_ips:
        shakedown.kill_process_on_host(
            service_ip, "cockroach start")  # Kill CockroachDB node
        tasks.check_running(SERVICE_NAME, DEFAULT_TASK_COUNT,
                            5 * 60)  # Wait for new CockroachDB node to run
        shakedown.wait_for(lambda: cockroach_nodes_healthy(),
                           noisy=True,
                           timeout_seconds=5 *
                           60)  # Wait for healthy CockroachDB cluster

    # Run cmd_select
    out_select = cmd.run_cli(cmd_select)

    # Confirm Output
    assert '2 rows' in out_select

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_recovery_partition.py Projeto: stevenwhannah/dcos-commons

def test_all_partition():
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    for host in hosts:
        shakedown.partition_agent(host)
    for host in hosts:
        shakedown.reconnect_agent(host)
    config.check_running()

Exemplo n.º 10

0

Exibir arquivo

def test_cleanup_then_kill_all_task_in_node(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts])

    check_health()

Exemplo n.º 11

0

Exibir arquivo

def test_config_updates_then_all_executors_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_tasks.kill_task_with_pattern('helloworld.executor.Main', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: wkl/dcos-cassandra-service

def test_config_update_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        bump_cpu_count_config,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts])

    check_health()

Exemplo n.º 13

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: wkl/dcos-cassandra-service

def test_repair_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_repair, lambda:
        [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts])

    check_health()

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: wkl/dcos-cassandra-service

def test_config_update_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        bump_cpu_count_config, lambda:
        [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts])

    check_health()

Exemplo n.º 15

0

Exibir arquivo

def test_shutdown_host_test():
    scheduler_ip = shakedown.get_service_ips('marathon', PACKAGE_NAME).pop()
    sdk_utils.out('marathon ip = {}'.format(scheduler_ip))

    node_ip = None
    for pod_id in range(0, DEFAULT_TASK_COUNT):
        pod_host = get_pod_host(pod_id)
        if pod_host != scheduler_ip:
            node_ip = pod_host
            break

    assert node_ip is not None, 'Could not find a node to shut down'

    old_agent = get_pod_agent(pod_id)
    sdk_utils.out('pod id = {}, node_ip = {}, agent = {}'.format(
        pod_id, node_ip, old_agent))

    task_ids = tasks.get_task_ids(PACKAGE_NAME, 'node-{}'.format(pod_id))

    # instead of partitioning or reconnecting, we shut down the host permanently
    status, stdout = shakedown.run_command_on_agent(node_ip,
                                                    'sudo shutdown -h +1')
    sdk_utils.out('shutdown agent {}: [{}] {}'.format(node_ip, status, stdout))

    assert status is True
    time.sleep(100)

    cmd.run_cli('cassandra pods replace node-{}'.format(pod_id))
    tasks.check_tasks_updated(PACKAGE_NAME, 'node', task_ids)

    # double check that all tasks are running
    tasks.check_running(PACKAGE_NAME, DEFAULT_TASK_COUNT)
    new_agent = get_pod_agent(pod_id)

    assert old_agent != new_agent

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test_zzzrecovery.py Projeto: keithchambers/dcos-commons

def test_config_updates_then_all_executors_killed():
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('helloworld.executor.Main', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: davidopp/dcos-cassandra-service

def test_cleanup_then_kill_all_task_in_node(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts]
    )

    check_health()

Exemplo n.º 18

0

Exibir arquivo

Arquivo: test_zzzrecovery.py Projeto: keithchambers/dcos-commons

def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('world', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

Exemplo n.º 19

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-kafka-service

def test_config_update_eventually_succeeds_after_all_brokers_fail():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        increment_broker_port_config,
        lambda: [kill_task_with_pattern('kafka.Kafka', h) for h in hosts]
    )

    check_health()

Exemplo n.º 20

0

Exibir arquivo

def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'world')
    hosts = shakedown.get_service_ips(config.SERVICE_NAME)
    config.bump_world_cpus()
    [sdk_cmd.kill_task_with_pattern('world', h) for h in hosts]
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, 'world', world_ids)
    config.check_running()

Exemplo n.º 21

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

def test_config_update_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        bump_cpu_count_config,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts]
    )

    check_health()

Exemplo n.º 22

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

def test_repair_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_repair,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts]
    )

    check_health()

Exemplo n.º 23

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: xixiang230/dcos-commons

def test_config_update_then_kill_all_task_in_node():
    #  kill both world tasks
    world_ids = tasks.get_task_ids(PACKAGE_NAME, 'world')
    bump_world_cpus()
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    [tasks.kill_task_with_pattern('world', h) for h in hosts]
    tasks.check_tasks_updated(PACKAGE_NAME, 'world', world_ids)
    check_running()

Exemplo n.º 24

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: snitesh91/dcos-cassandra-service

def test_cleanup_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    run_planned_operation(
        run_cleanup, lambda:
        [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts],
        lambda: recover_failed_agents(hosts))

    check_health()

Exemplo n.º 25

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: snitesh91/dcos-cassandra-service

def test_cleanup_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    run_planned_operation(
        run_cleanup,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts],
        lambda: recover_failed_agents(hosts))

    check_health()

Exemplo n.º 26

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: wkl/dcos-cassandra-service

def test_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        shakedown.partition_agent(host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()

Exemplo n.º 27

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-kafka-service

def test_service_becomes_healthy_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        spin(shakedown.partition_agent, lambda x: (True, ''), host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()

Exemplo n.º 28

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

def test_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        shakedown.partition_agent(host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()

Exemplo n.º 29

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: snitesh91/dcos-cassandra-service

def test_repair_then_kill_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    host = get_node_host()

    run_planned_operation(
        run_repair, lambda: kill_task_with_pattern('CassandraDaemon', host),
        lambda: recover_failed_agents(hosts))

    check_health()

Exemplo n.º 30

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: xinxian0458/dcos-kafka-service

def test_service_becomes_healthy_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        spin(shakedown.partition_agent, lambda x: (True, ''), host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()

Exemplo n.º 31

0

Exibir arquivo

def get_scheduler_host(service_name):
    # Marathon mangles foldered paths as follows: "/path/to/svc" => "svc.to.path"
    task_name_elems = service_name.lstrip('/').split('/')
    task_name_elems.reverse()
    app_name = '.'.join(task_name_elems)
    ips = shakedown.get_service_ips('marathon', app_name)
    if len(ips) == 0:
        raise Exception('No IPs found for marathon task "{}". Available tasks are: {}'.format(
            app_name, [task['name'] for task in shakedown.get_service_tasks('marathon')]))
    return ips.pop()

Exemplo n.º 32

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: davidopp/dcos-cassandra-service

def test_cleanup_then_all_executors_killed(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup(),
        lambda: [
            kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts
        ]
    )

    check_health()

Exemplo n.º 33

0

Exibir arquivo

Arquivo: sdk_marathon.py Projeto: keithchambers/dcos-commons

def get_scheduler_host(service_name):
    # Marathon mangles foldered paths as follows: "/path/to/svc" => "svc.to.path"
    task_name_elems = service_name.lstrip('/').split('/')
    task_name_elems.reverse()
    app_name = '.'.join(task_name_elems)
    ips = shakedown.get_service_ips('marathon', app_name)
    if len(ips) == 0:
        raise Exception('No IPs found for marathon task "{}". Available tasks are: {}'.format(
            app_name, [task['name'] for task in shakedown.get_service_tasks('marathon')]))
    return ips.pop()

Exemplo n.º 34

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

def test_repair_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_repair,
        lambda: [
            kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts
        ]
    )

    check_health()

Exemplo n.º 35

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

def test_config_update_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        bump_cpu_count_config,
        lambda: [
            kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts
        ]
    )

    check_health()

Exemplo n.º 36

0

Exibir arquivo

def test_kill_all_journalnodes():
    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')
    data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data')

    for host in shakedown.get_service_ips(PACKAGE_NAME):
        tasks.kill_task_with_pattern('journalnode', host)

    check_healthy()
    # name nodes fail and restart, so don't check those
    tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids)
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'data', data_ids)

Exemplo n.º 37

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

def test_repair_then_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(run_repair, partition)

    check_health()

Exemplo n.º 38

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

def test_config_update_then_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(bump_cpu_count_config, partition)

    check_health()

Exemplo n.º 39

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: wkl/dcos-cassandra-service

def test_config_update_then_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(bump_cpu_count_config, partition)

    check_health()

Exemplo n.º 40

0

Exibir arquivo

def test_kill_all_datanodes():
    journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal')
    name_ids = tasks.get_task_ids(PACKAGE_NAME, 'name')
    data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data')

    for host in shakedown.get_service_ips(PACKAGE_NAME):
        tasks.kill_task_with_pattern('datanode', host)

    check_healthy()
    tasks.check_tasks_updated(PACKAGE_NAME, 'data', data_ids)
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'journal', journal_ids)
    tasks.check_tasks_not_updated(PACKAGE_NAME, 'name', name_ids)

Exemplo n.º 41

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: davidopp/dcos-cassandra-service

def test_cleanup_then_all_partition(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(run_cleanup, partition)

    check_health()

Exemplo n.º 42

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: wkl/dcos-cassandra-service

def test_repair_then_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(run_repair, partition)

    check_health()

Exemplo n.º 43

0

Exibir arquivo

def test_cleanup_then_all_partition(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(run_cleanup, partition)

    check_health()

Exemplo n.º 44

0

Exibir arquivo

def test_kill_all_journalnodes():
    journal_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'journal')
    data_ids = sdk_tasks.get_task_ids(FOLDERED_SERVICE_NAME, 'data')

    for host in shakedown.get_service_ips(FOLDERED_SERVICE_NAME):
        sdk_tasks.kill_task_with_pattern('journalnode', host)

    expect_recovery()
    # name nodes fail and restart, so don't check those
    sdk_tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, 'journal',
                                  journal_ids)
    sdk_tasks.check_tasks_not_updated(FOLDERED_SERVICE_NAME, 'data', data_ids)

Exemplo n.º 45

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-kafka-service

def test_config_update_eventually_succeeds_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            spin(shakedown.partition_agent, lambda x: (True, ''), host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(increment_broker_port_config, partition)

    check_health()

Exemplo n.º 46

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: xinxian0458/dcos-kafka-service

def test_config_update_eventually_succeeds_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            spin(shakedown.partition_agent, lambda x: (True, ''), host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(increment_broker_port_config, partition)

    check_health()

Exemplo n.º 47

0

Exibir arquivo

Arquivo: sdk_tasks.py Projeto: keithchambers/dcos-commons

def get_tasks_avoiding_scheduler(service_name, task_name_pattern):
    '''Returns a list of tasks which are not located on the Scheduler's machine.

    Avoid also killing the system that the scheduler is on. This is just to speed up testing.
    In practice, the scheduler would eventually get relaunched on a different node by Marathon and
    we'd be able to proceed with repairing the service from there. However, it takes 5-20 minutes
    for Mesos to decide that the agent is dead. This is also why we perform a manual 'ls' check to
    verify the host is down, rather than waiting for Mesos to tell us.
    '''
    scheduler_ip = shakedown.get_service_ips('marathon', service_name).pop()
    log.info('Scheduler IP: {}'.format(scheduler_ip))

    server_tasks = [
        task for task in get_summary()
        if task_name_pattern.match(task.name)]

    avoid_tasks = [task for task in server_tasks if task.host != scheduler_ip]
    log.info('Found tasks avoiding scheduler at {}: {}'.format(scheduler_ip, avoid_tasks))
    return avoid_tasks

Exemplo n.º 48

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: joerg84/dcos-commons

def test_shutdown_host_test():
    scheduler_ip = shakedown.get_service_ips('marathon', config.SERVICE_NAME).pop()
    log.info('marathon ip = {}'.format(scheduler_ip))

    node_ip = None
    pod_name = None
    for pod_id in range(0, config.DEFAULT_TASK_COUNT):
        pod_name = 'node-{}'.format(pod_id)
        pod_host = get_pod_host(pod_name)
        if pod_host != scheduler_ip:
            node_ip = pod_host
            break

    assert node_ip is not None, 'Could not find a node to shut down'

    old_agent = get_pod_agent(pod_name)
    log.info('pod name = {}, node_ip = {}, agent = {}'.format(pod_name, node_ip, old_agent))

    task_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, pod_name)

    # instead of partitioning or reconnecting, we shut down the host permanently
    status, stdout = shakedown.run_command_on_agent(node_ip, 'sudo shutdown -h +1')
    log.info('shutdown agent {}: [{}] {}'.format(node_ip, status, stdout))

    assert status is True

    log.info('sleeping 100s after shutting down agent')
    time.sleep(100)

    cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod replace {}'.format(pod_name))
    sdk_tasks.check_tasks_updated(config.SERVICE_NAME, pod_name, task_ids)

    # double check that all tasks are running
    sdk_tasks.check_running(config.SERVICE_NAME, config.DEFAULT_TASK_COUNT)
    sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)

    new_agent = get_pod_agent(pod_name)
    assert old_agent != new_agent

Exemplo n.º 49

0

Exibir arquivo

Arquivo: test_sanity.py Projeto: keithchambers/dcos-commons

def test_kill_scheduler():
    sdk_cmd.kill_task_with_pattern('hdfs.scheduler.Main', shakedown.get_service_ips('marathon').pop())
    config.check_healthy(service_name=sdk_utils.get_foldered_name(config.SERVICE_NAME))

Exemplo n.º 50

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-kafka-service

def test_service_becomes_healthy_after_all_brokers_fail():
    for host in shakedown.get_service_ips(PACKAGE_NAME):
        kill_task_with_pattern('kafka.Kafka', host)

    check_health()

Exemplo n.º 51

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-kafka-service

def get_broker_host():
    return shakedown.get_service_ips(PACKAGE_NAME).pop()

Exemplo n.º 52

0

Exibir arquivo

Arquivo: test_zzzrecovery.py Projeto: keithchambers/dcos-commons

def test_all_executors_killed():
    for host in shakedown.get_service_ips(config.SERVICE_NAME):
        sdk_cmd.kill_task_with_pattern('helloworld.executor.Main', host)
    config.check_running()

Exemplo n.º 53

0

Exibir arquivo

Arquivo: config.py Projeto: albertostratio/dcos-commons

def get_marathon_host():
    return shakedown.get_service_ips('marathon', PACKAGE_NAME).pop()

Exemplo n.º 54

0

Exibir arquivo

Arquivo: test_sanity.py Projeto: joerg84/dcos-commons

def test_kill_scheduler():
    sdk_tasks.kill_task_with_pattern('hdfs.scheduler.Main', shakedown.get_service_ips('marathon').pop())
    config.check_healthy(service_name=FOLDERED_SERVICE_NAME)

Exemplo n.º 55

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

def test_kill_all_task_in_node():
    for host in shakedown.get_service_ips(PACKAGE_NAME):
        kill_task_with_pattern('CassandraDaemon', host)

    check_health()

Exemplo n.º 56

0

Exibir arquivo

Arquivo: common.py Projeto: guenter/marathon

def ip_of_mom():
    service_ips = shakedown.get_service_ips('marathon', 'marathon-user')
    for mom_ip in service_ips:
        return mom_ip

Exemplo n.º 57

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

 def fn():
     try:
         return shakedown.get_service_ips(PACKAGE_NAME)
     except IndexError:
         return set()

Exemplo n.º 58

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

def test_all_executors_killed():
    for host in shakedown.get_service_ips(PACKAGE_NAME):
        kill_task_with_pattern('cassandra.executor.Main', host)

    check_health()

Exemplo n.º 59

0

Exibir arquivo

Arquivo: test_recovery.py Projeto: mesosphere/dcos-cassandra-service

def get_scheduler_host():
    return shakedown.get_service_ips('marathon').pop()