def test_partition_master_outgoing():
    master_leader_ip = shakedown.master_leader_ip()
    shakedown.partition_master(master_leader_ip, incoming=False, outgoing=True)
    time.sleep(20)
    shakedown.reconnect_master(master_leader_ip)

    check_health()
def test_partition_master_both_ways():
    master_leader_ip = shakedown.master_leader_ip()
    shakedown.partition_master(master_leader_ip)
    time.sleep(20)
    shakedown.reconnect_master(master_leader_ip)

    check_health()
Пример #3
0
def test_partition(install_framework):
    host = get_node_host()

    shakedown.partition_agent(host)
    shakedown.reconnect_agent(host)

    check_health()
def test_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    for host in hosts:
        kill_task_with_pattern('CassandraDaemon', host)

    recover_failed_agents(hosts)
    check_health()
def test_config_update_then_master_killed():
    master_leader_ip = shakedown.master_leader_ip()
    run_planned_operation(
        lambda: bump_cpu_count_config(-0.1),
        lambda: kill_task_with_pattern('mesos-master', master_leader_ip))
    verify_leader_changed(master_leader_ip)
    check_health()
Пример #6
0
def test_cleanup_then_kill_all_task_in_node(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts])

    check_health()
Пример #7
0
def test_config_update_then_scheduler_died(install_framework):
    host = get_scheduler_host()
    run_planned_operation(
        bump_cpu_count_config,
        lambda: kill_task_with_pattern('cassandra.scheduler.Main', host))

    check_health()
Пример #8
0
def test_cleanup_then_scheduler_died(install_framework):
    host = get_scheduler_host()
    run_planned_operation(
        run_cleanup,
        lambda: kill_task_with_pattern('cassandra.scheduler.Main', host))

    check_health()
def test_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    for host in hosts:
        kill_task_with_pattern('cassandra.executor.Main', host)

    recover_failed_agents(hosts)
    check_health()
def test_repair_then_scheduler_died():
    host = get_scheduler_host()
    run_planned_operation(
        run_repair,
        lambda: kill_task_with_pattern('cassandra.scheduler.Main', host))

    check_health()
Пример #11
0
def test_config_update_then_kill_task_in_node(install_framework):
    host = get_node_host()
    run_planned_operation(
        bump_cpu_count_config,
        lambda: kill_task_with_pattern('CassandraDaemon', host))

    check_health()
def test_nodes_decrease_by_one_should_fail():
    completed_plan = infinity_commons.get_and_verify_plan(
        lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value)
    mc = dcos.marathon.create_client()
    app = mc.get_app('/cassandra')
    app = infinity_commons.strip_meta(app)
    oe = app['env']
    env_node_count = int(oe['NODES']) - 1
    oe['NODES'] = str(env_node_count)
    app['env'] = oe
    print("Updated node count: {}".format(app['env']['NODES']))
    print(mc.update_app(app_id='/cassandra', payload=app, force=True))
    check_health()
    plan = infinity_commons.get_and_verify_plan(
        lambda p: (p['status'] == infinity_commons.PlanState.ERROR.value and
                   len(infinity_commons.filter_phase(p, "Deploy")['steps']) == 3))
    print(plan)
    assert plan['status'] == infinity_commons.PlanState.ERROR.value

    # Revert
    oe = app['env']
    env_node_count = int(oe['NODES']) + 1
    oe['NODES'] = str(env_node_count)
    app['env'] = oe
    print("Reverted node count: {}".format(app['env']['NODES']))
    print(mc.update_app(app_id='/cassandra', payload=app, force=True))
    check_health()
    plan = infinity_commons.get_and_verify_plan(
        lambda p: (p['status'] == infinity_commons.PlanState.COMPLETE.value) and
        (len(infinity_commons.filter_phase(p, "Deploy")['steps']) == 3))
    print(plan)
    assert plan['status'] == infinity_commons.PlanState.COMPLETE.value
def test_change_disk_should_fail():
    completed_plan = infinity_commons.get_and_verify_plan(lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value)
    mc = dcos.marathon.create_client()
    app = mc.get_app('/cassandra')
    app = infinity_commons.strip_meta(app)
    oe = app['env']
    disk = int(oe['CASSANDRA_DISK_MB']) - 1
    oe['CASSANDRA_DISK_MB'] = str(disk)
    app['env'] = oe
    print("Updated CASSANDRA_DISK_MB: {}".format(app['env']['CASSANDRA_DISK_MB']))
    print(mc.update_app(app_id='/cassandra', payload=app, force=True))
    check_health()
    plan = infinity_commons.get_and_verify_plan(lambda p: p['status'] == infinity_commons.PlanState.ERROR.value)
    print(plan)
    assert plan['status'] == infinity_commons.PlanState.ERROR.value

    # Revert
    oe = app['env']
    disk = int(oe['CASSANDRA_DISK_MB']) + 1
    oe['CASSANDRA_DISK_MB'] = str(disk)
    app['env'] = oe
    print("Reverted CASSANDRA_DISK_MB: {}".format(app['env']['CASSANDRA_DISK_MB']))
    print(mc.update_app(app_id='/cassandra', payload=app, force=True))
    check_health()
    plan = infinity_commons.get_and_verify_plan(lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value)
    print(plan)
    assert plan['status'] == infinity_commons.PlanState.COMPLETE.value
Пример #14
0
def test_config_update_then_executor_killed(install_framework):
    host = get_node_host()
    run_planned_operation(
        bump_cpu_count_config,
        lambda: kill_task_with_pattern('cassandra.executor.Main', host))

    check_health()
Пример #15
0
def test_cleanup_then_executor_killed(install_framework):
    host = get_node_host()
    run_planned_operation(
        run_cleanup,
        lambda: kill_task_with_pattern('cassandra.executor.Main', host))

    check_health()
Пример #16
0
def install_framework():
    uninstall()
    shakedown.install_package_and_wait(PACKAGE_NAME)
    check_health()
    yield

    uninstall()
def test_partition():
    host = get_node_host()

    shakedown.partition_agent(host)
    shakedown.reconnect_agent(host)

    check_health()
Пример #18
0
def test_cleanup_then_all_executors_killed(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup(), lambda:
        [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts])

    check_health()
def test_repair_then_master_killed():
    run_planned_operation(
        run_repair,
        lambda: kill_task_with_pattern('mesos-master')
    )

    check_health()
def test_repair_then_zk_killed():
    run_planned_operation(
        run_repair,
        lambda: kill_task_with_pattern('zookeeper')
    )

    check_health()
def test_partition(install_framework):
    host = get_node_host()

    shakedown.partition_agent(host)
    shakedown.reconnect_agent(host)

    check_health()
Пример #22
0
def install_framework():
    shakedown.install_package_and_wait(PACKAGE_NAME)
    check_health()

    yield

    uninstall()
Пример #23
0
def test_nodes_increase_by_one():
    completed_plan = infinity_commons.get_and_verify_plan(
        lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value)
    mc = dcos.marathon.create_client()
    app = mc.get_app('/cassandra')
    app = infinity_commons.strip_meta(app)
    oe = app['env']
    env_node_count = int(oe['NODES']) + 1
    oe['NODES'] = str(env_node_count)
    app['env'] = oe
    print("Updated node count: {}".format(app['env']['NODES']))
    print(mc.update_app(app_id='/cassandra', payload=app, force=True))
    check_health()
    plan = infinity_commons.get_and_verify_plan(lambda p: (
        p['status'] == infinity_commons.PlanState.COMPLETE.value and len(
            infinity_commons.filter_phase(p, "Deploy")['steps']) == 4 and
        (infinity_commons.filter_phase(p, "Deploy")['steps'][
            env_node_count - 1]['status'] == infinity_commons.PlanState.
         COMPLETE.value)))
    print(plan)
    assert plan['status'] == infinity_commons.PlanState.COMPLETE.value
    # reinstall after increase:
    uninstall()
    install()
    check_health()
Пример #24
0
def test_change_disk_should_fail():
    completed_plan = infinity_commons.get_and_verify_plan(
        lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value)
    mc = dcos.marathon.create_client()
    app = mc.get_app('/cassandra')
    app = infinity_commons.strip_meta(app)
    oe = app['env']
    disk = int(oe['CASSANDRA_DISK_MB']) - 1
    oe['CASSANDRA_DISK_MB'] = str(disk)
    app['env'] = oe
    print("Updated CASSANDRA_DISK_MB: {}".format(
        app['env']['CASSANDRA_DISK_MB']))
    print(mc.update_app(app_id='/cassandra', payload=app, force=True))
    check_health()
    plan = infinity_commons.get_and_verify_plan(
        lambda p: p['status'] == infinity_commons.PlanState.ERROR.value)
    print(plan)
    assert plan['status'] == infinity_commons.PlanState.ERROR.value

    # Revert
    oe = app['env']
    disk = int(oe['CASSANDRA_DISK_MB']) + 1
    oe['CASSANDRA_DISK_MB'] = str(disk)
    app['env'] = oe
    print("Reverted CASSANDRA_DISK_MB: {}".format(
        app['env']['CASSANDRA_DISK_MB']))
    print(mc.update_app(app_id='/cassandra', payload=app, force=True))
    check_health()
    plan = infinity_commons.get_and_verify_plan(
        lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value)
    print(plan)
    assert plan['status'] == infinity_commons.PlanState.COMPLETE.value
def test_upgrade_downgrade():
    test_repo_name, test_repo_url = get_test_repo_info()
    test_version = get_pkg_version()
    print('Found test version: {}'.format(test_version))
    remove_repo(test_repo_name, test_version)
    master_version = get_pkg_version()
    print('Found master version: {}'.format(master_version))

    print('Installing master version')
    install(package_version = master_version)
    check_health()
    plan = infinity_commons.get_and_verify_plan(lambda p: p['status'] == infinity_commons.PlanState.COMPLETE.value)
    assert plan['status'] == infinity_commons.PlanState.COMPLETE.value

    # TODO: write some data

    print('Upgrading to test version')
    destroy_service()
    add_repo(test_repo_name, test_repo_url, master_version)
    install(package_version = test_version)
    check_post_version_change_health()

    print('Downgrading to master version')
    destroy_service()
    install(package_version = master_version)
    check_post_version_change_health()
def test_cleanup_then_kill_task_in_node(install_framework):
    host = get_node_host()
    run_planned_operation(
        run_cleanup,
        lambda: kill_task_with_pattern('CassandraDaemon', host)
    )

    check_health()
def test_repair_then_zk_killed():
    master_leader_ip = shakedown.master_leader_ip()
    run_planned_operation(
        run_repair,
        lambda: kill_task_with_pattern('zookeeper', master_leader_ip),
        lambda: verify_leader_changed(master_leader_ip))

    check_health()
def test_cleanup_then_scheduler_died(install_framework):
    host = get_scheduler_host()
    run_planned_operation(
        run_cleanup,
        lambda: kill_task_with_pattern('cassandra.scheduler.Main', host)
    )

    check_health()
def test_cleanup_then_master_killed():
    master_leader_ip = shakedown.master_leader_ip()
    run_planned_operation(
        run_cleanup,
        lambda: kill_task_with_pattern('mesos-master', master_leader_ip))

    verify_leader_changed(master_leader_ip)
    check_health()
def test_config_update_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    run_planned_operation(
        bump_cpu_count_config, lambda:
        [kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts],
        lambda: recover_failed_agents(hosts))
    check_health()
def test_cleanup_then_executor_killed(install_framework):
    host = get_node_host()
    run_planned_operation(
        run_cleanup,
        lambda: kill_task_with_pattern('cassandra.executor.Main', host)
    )

    check_health()
def test_cleanup_then_kill_all_task_in_node(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts]
    )

    check_health()
def test_config_update_then_zk_killed():
    master_leader_ip = shakedown.master_leader_ip()
    run_planned_operation(
        bump_cpu_count_config,
        lambda: kill_task_with_pattern('zookeeper', master_leader_ip),
        lambda: verify_leader_changed(master_leader_ip))

    check_health()
def test_config_update_then_executor_killed():
    host = get_node_host()

    run_planned_operation(
        lambda: bump_cpu_count_config(-0.1),
        lambda: kill_task_with_pattern('cassandra.executor.Main', host),
        lambda: recover_failed_agents([host]))
    check_health()
def test_repair_then_scheduler_died():
    host = get_scheduler_host()
    run_planned_operation(
        run_repair,
        lambda: kill_task_with_pattern('cassandra.scheduler.Main', host)
    )

    check_health()
def test_config_update_then_executor_killed():
    host = get_node_host()
    run_planned_operation(
        bump_cpu_count_config,
        lambda: kill_task_with_pattern('cassandra.executor.Main', host)
    )

    check_health()
Пример #37
0
def test_repair_then_executor_killed():
    host = get_node_host()
    run_planned_operation(
        run_repair,
        lambda: kill_task_with_pattern('cassandra.executor.Main', host)
    )

    check_health()
Пример #38
0
def test_config_update_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        bump_cpu_count_config,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts]
    )

    check_health()
def test_config_update_then_scheduler_died():
    host = get_scheduler_host()
    run_planned_operation(
        bump_cpu_count_config,
        lambda: kill_task_with_pattern('cassandra.scheduler.Main', host)
    )

    check_health()
def test_config_update_then_kill_task_in_node():
    host = get_node_host()
    run_planned_operation(
        bump_cpu_count_config,
        lambda: kill_task_with_pattern('CassandraDaemon', host)
    )

    check_health()
Пример #41
0
def test_partition():
    host = get_node_host()

    _block_on_adminrouter()
    shakedown.partition_agent(host)
    shakedown.reconnect_agent(host)

    check_health()
Пример #42
0
def test_repair_then_kill_all_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_repair,
        lambda: [kill_task_with_pattern('CassandraDaemon', h) for h in hosts]
    )

    check_health()
Пример #43
0
def test_repair_then_kill_task_in_node():
    host = get_node_host()
    run_planned_operation(
        run_repair,
        lambda: kill_task_with_pattern('CassandraDaemon', host)
    )

    check_health()
def test_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        shakedown.partition_agent(host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()
Пример #45
0
def test_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        shakedown.partition_agent(host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()
def test_repair_then_kill_task_in_node():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    host = get_node_host()

    run_planned_operation(
        run_repair, lambda: kill_task_with_pattern('CassandraDaemon', host),
        lambda: recover_failed_agents(hosts))

    check_health()
def test_cleanup_then_executor_killed():
    host = get_node_host()

    run_planned_operation(
        run_cleanup,
        lambda: kill_task_with_pattern('cassandra.executor.Main', host),
        lambda: recover_failed_agents([host]))

    check_health()
def test_cleanup_then_all_executors_killed(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_cleanup(),
        lambda: [
            kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts
        ]
    )

    check_health()
def test_repair_then_partition():
    host = get_node_host()

    def partition():
        shakedown.partition_agent(host)
        shakedown.reconnect_agent(host)

    run_planned_operation(run_repair, partition)

    check_health()
def test_repair_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        run_repair,
        lambda: [
            kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts
        ]
    )

    check_health()
def test_config_update_then_all_executors_killed():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        bump_cpu_count_config,
        lambda: [
            kill_task_with_pattern('cassandra.executor.Main', h) for h in hosts
        ]
    )

    check_health()
def test_config_update_then_partition():
    host = get_node_host()

    def partition():
        shakedown.partition_agent(host)
        shakedown.reconnect_agent(host)

    run_planned_operation(bump_cpu_count_config, partition)

    check_health()
def test_cleanup_then_partition(install_framework):
    host = get_node_host()

    def partition():
        shakedown.partition_agent(host)
        shakedown.reconnect_agent(host)

    run_planned_operation(run_cleanup, partition)

    check_health()
def test_cleanup_then_all_partition(install_framework):
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(run_cleanup, partition)

    check_health()
def test_repair_then_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(run_repair, partition)

    check_health()
def test_config_update_then_all_partition():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            shakedown.partition_agent(host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(bump_cpu_count_config, partition)

    check_health()