def test_config_update_eventually_succeeds_after_all_brokers_fail():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        increment_broker_port_config,
        lambda: [kill_task_with_pattern('kafka.Kafka', h) for h in hosts])

    check_health()
def test_service_becomes_healthy_after_agent_is_partitioned():
    host = get_broker_host()

    spin(shakedown.partition_agent, lambda x: (True, ''), host)
    shakedown.reconnect_agent(host)

    check_health()
Пример #3
0
def test_upgrade_downgrade():
    # Ensure both Universe and the test repo exist.
    if len(shakedown.get_package_repos()['repositories']) != 2:
        print('No test repo found.  Skipping test_upgrade_downgrade')
        return

    test_version = get_pkg_version()
    print('Found test version: {}'.format(test_version))

    add_repo(MASTER_CUSTOM_NAME, MASTER_CUSTOM_URL, prev_version=test_version)

    master_version = get_pkg_version()
    print('Found master version: {}'.format(master_version))
    print('Installing master version')
    install({'package_version': master_version})
    check_health()
    write_some_data("data-0-node.hdfs.mesos", TEST_FILE_NAME)
    # gives chance for write to succeed and replication to occur
    time.sleep(5)

    print('Upgrading to test version')
    destroy_and_install(test_version)
    check_health_after_version_change()

    print('Downgrading to master version')
    destroy_and_install(master_version)
    check_health_after_version_change()

    # clean up
    remove_repo(prev_version=master_version)
def test_config_update_eventually_succeeds_after_scheduler_fails():
    host = get_scheduler_host()
    run_planned_operation(
        increment_broker_port_config,
        lambda: kill_task_with_pattern('kafka.scheduler.Main', host))

    check_health()
Пример #5
0
def test_config_update_eventually_succeeds_after_zk_fails():
    run_planned_operation(
        increment_broker_port_config,
        lambda: kill_task_with_pattern('zookeeper')
    )

    check_health()
Пример #6
0
def test_upgrade_downgrade():
    # Ensure both Universe and the test repo exist. @mgummelt
    if len(shakedown.get_package_repos()['repositories']) != 2:
        print('No kafka test repo found.  Skipping test_upgrade_downgrade')
        return

    test_repo_name, test_repo_url = get_test_repo_info()
    test_version = get_pkg_version()
    print('Found test version: {}'.format(test_version))
    remove_repo(test_repo_name, test_version)
    master_version = get_pkg_version()
    print('Found master version: {}'.format(master_version))

    print('Installing master version')
    install(package_version=master_version)
    check_health()

    plan = get_plan(lambda p: p['status'] == 'COMPLETE')
    assert plan['status'] == 'COMPLETE'

    topics_are_available()
    write_messages()

    print('Upgrading to test version')
    destroy_service()
    add_repo(test_repo_name, test_repo_url, master_version)
    install(package_version=test_version)
    check_post_version_change_health()

    print('Downgrading to master version')
    destroy_service()
    install(package_version=master_version)
    check_post_version_change_health()
Пример #7
0
def test_deploy():
    # taskcfg.yml will initially fail to deploy because several options are missing in the default
    # marathon.json.mustache. verify that tasks are failing for 30s before continuing.
    print('Checking that tasks are failing to launch for at least 30s')
    end_time = time.time() + 30
    # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s:
    consecutive_task_running = 0
    while time.time() < end_time:
        try:
            tasks = shakedown.get_service_tasks(PACKAGE_NAME)
        except Exception as e:
            continue
        states = [t['state'] for t in tasks]
        print('Task states: {}'.format(states))
        if TASK_RUNNING_STATE in states:
            consecutive_task_running += 1
            assert consecutive_task_running <= 3
        else:
            consecutive_task_running = 0
        time.sleep(1)

    # add the needed envvars in marathon and confirm that the deployment succeeds:
    config = get_marathon_config()
    env = config['env']
    del env['SLEEP_DURATION']
    env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output'
    env['TASKCFG_ALL_SLEEP_DURATION'] = '1000'
    request(dcos.http.put,
            marathon_api_url('apps/' + PACKAGE_NAME),
            json=config)

    check_health()
def test_service_becomes_healthy_after_agent_is_partitioned():
    host = get_broker_host()

    spin(shakedown.partition_agent, lambda x: (True, ''), host)
    shakedown.reconnect_agent(host)

    check_health()
def test_config_update_eventually_succeeds_after_all_brokers_fail():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)
    run_planned_operation(
        increment_broker_port_config,
        lambda: [kill_task_with_pattern('kafka.Kafka', h) for h in hosts]
    )

    check_health()
Пример #10
0
def test_config_update_eventually_succeeds_after_scheduler_fails():
    host = get_scheduler_host()
    run_planned_operation(
        increment_broker_port_config,
        lambda: kill_task_with_pattern('kafka.scheduler.Main', host)
    )

    check_health()
def test_service_becomes_healthy_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        spin(shakedown.partition_agent, lambda x: (True, ''), host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()
Пример #12
0
def test_service_becomes_healthy_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    for host in hosts:
        spin(shakedown.partition_agent, lambda x: (True, ''), host)
    for host in hosts:
        shakedown.reconnect_agent(host)

    check_health()
Пример #13
0
def test_can_adjust_config_from_dynamic_to_dynamic_port():
    check_health()

    connections = get_connection_info()['address']
    config = get_kafka_config()
    brokerCpus = int(config['env']['BROKER_CPUS'])
    config['env']['BROKER_CPUS'] = str(brokerCpus + 0.1)
    update_kafka_config(config)

    check_health()
Пример #14
0
def test_can_adjust_config_from_dynamic_to_dynamic_port():
    check_health()

    connections = get_connection_info()['address']
    config = get_kafka_config()
    brokerCpus = int(config['env']['BROKER_CPUS'])
    config['env']['BROKER_CPUS'] = str(brokerCpus + 0.1)
    r = request(dcos.http.put, marathon_api_url('apps/kafka'), json=config)

    check_health()
def test_can_adjust_config_from_dynamic_to_dynamic_port():
    check_health()

    connections = get_connection_info()['address']
    config = get_kafka_config()
    brokerCpus = int(config['env']['BROKER_CPUS'])
    config['env']['BROKER_CPUS'] = str(brokerCpus + 0.1)
    update_kafka_config(config)

    check_health()
def test_config_update_eventually_succeeds_after_agent_is_partitioned():
    host = get_broker_host()

    def partition():
        spin(shakedown.partition_agent, lambda x: (True, ''), host)
        shakedown.reconnect_agent(host)

    run_planned_operation(increment_broker_port_config, partition)

    check_health()
Пример #17
0
def test_config_update_eventually_succeeds_after_agent_is_partitioned():
    host = get_broker_host()

    def partition():
        spin(shakedown.partition_agent, lambda x: (True, ''), host)
        shakedown.reconnect_agent(host)

    run_planned_operation(increment_broker_port_config, partition)

    check_health()
Пример #18
0
def test_config_update_eventually_succeeds_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            spin(shakedown.partition_agent, lambda x: (True, ''), host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(increment_broker_port_config, partition)

    check_health()
def test_config_update_eventually_succeeds_after_all_agents_are_partitioned():
    hosts = shakedown.get_service_ips(PACKAGE_NAME)

    def partition():
        for host in hosts:
            spin(shakedown.partition_agent, lambda x: (True, ''), host)
        for host in hosts:
            shakedown.reconnect_agent(host)

    run_planned_operation(increment_broker_port_config, partition)

    check_health()
Пример #20
0
def test_can_adjust_config_from_dynamic_to_dynamic_port():
    check_health()

    connections = get_connection_info()['address']
    config = get_kafka_config()
    config['env']['KAFKA_VER_NAME'] = 'kafka-nonce-ver'
    r = request(dcos.http.put, marathon_api_url('apps/kafka'), json=config)

    check_health()

    result = get_connection_info()
    assert (set([a.split(':')[-1] for a in result['address']
                 ]) == set([a.split(':')[-1] for a in connections]))
Пример #21
0
def test_bump_hello_cpus():
    check_health()
    hello_ids = get_task_ids('hello')
    print('hello ids: ' + str(hello_ids))

    config = get_marathon_config()
    cpus = float(config['env']['HELLO_CPUS'])
    config['env']['HELLO_CPUS'] = str(cpus + 0.1)
    request(dcos.http.put,
            marathon_api_url('apps/' + PACKAGE_NAME),
            json=config)

    tasks_updated('hello', hello_ids)
    check_health()
Пример #22
0
def test_can_adjust_config_from_static_to_dynamic_port():
    check_health()

    config = get_kafka_config()
    config['env']['BROKER_PORT'] = '0'
    update_kafka_config(config)

    check_health()

    result = get_connection_info()
    assert len(result['address']) == 3

    for hostport in result['address']:
        assert 9092 != int(hostport.split(':')[-1])
Пример #23
0
def setup_module(module):
    uninstall()

    if strict_mode == 'strict':
        shakedown.install_package_and_wait(
            package_name=PACKAGE_NAME,
            options_file=os.path.dirname(
                os.path.abspath(inspect.getfile(inspect.currentframe()))) +
            "/strict.json")
    else:
        shakedown.install_package_and_wait(package_name=PACKAGE_NAME,
                                           options_file=None)

    check_health()
def test_can_adjust_config_from_static_to_dynamic_port():
    check_health()

    config = get_kafka_config()
    config['env']['BROKER_PORT'] = '0'
    update_kafka_config(config)

    check_health()

    result = get_connection_info()
    assert len(result['address']) == 3

    for hostport in result['address']:
        assert 9092 != int(hostport.split(':')[-1])
Пример #25
0
def test_can_adjust_config_from_static_to_dynamic_port():
    check_health()

    config = get_kafka_config()
    config['env']['BROKER_PORT'] = '0'
    r = request(dcos.http.put, marathon_api_url('apps/kafka'), json=config)

    check_health()

    result = get_connection_info()
    assert len(result['address']) == 3

    for hostport in result['address']:
        assert 9092 != int(hostport.split(':')[-1])
Пример #26
0
def test_bump_hello_nodes():
    check_health()

    hello_ids = get_task_ids('hello')
    print('hello ids: ' + str(hello_ids))

    config = get_marathon_config()
    nodeCount = int(config['env']['HELLO_COUNT']) + 1
    config['env']['HELLO_COUNT'] = str(nodeCount)
    request(dcos.http.put,
            marathon_api_url('apps/' + PACKAGE_NAME),
            json=config)

    check_health()
    tasks_not_updated('hello', hello_ids)
Пример #27
0
def test_upgrade():
    test_version = get_pkg_version()
    print('Found test version: {}'.format(test_version))
    add_repo(test_version)
    master_version = get_pkg_version()
    print('Found master version: {}'.format(master_version))
    print('Installing master version')
    install(master_version)
    check_health()
    print('Upgrading to test version')
    destroy_service()
    install(test_version)
    check_health()
    # clean up
    remove_repo(master_version)
Пример #28
0
def test_bump_data_nodes():
    check_health()

    data_ids = get_task_ids('data')
    print('data ids: ' + str(data_ids))

    config = get_marathon_config()
    nodeCount = int(config['env']['DATA_COUNT']) + 1
    config['env']['DATA_COUNT'] = str(nodeCount)
    r = request(
        dcos.http.put,
        marathon_api_url('apps/' + PACKAGE_NAME),
        json=config)

    check_health(DEFAULT_HDFS_TASK_COUNT + 1)
    tasks_not_updated('data', data_ids)
Пример #29
0
def test_integrity_on_data_node_failure():
    shakedown.wait_for(
        lambda: write_data_to_hdfs("data-0-node.hdfs.mesos", TEST_FILE_1_NAME),
        HDFS_CMD_TIMEOUT_SEC)

    # gives chance for write to succeed and replication to occur
    time.sleep(5)

    kill_task_with_pattern("DataNode", 'data-0-node.hdfs.mesos')
    kill_task_with_pattern("DataNode", 'data-1-node.hdfs.mesos')
    time.sleep(1)  # give DataNode a chance to die

    shakedown.wait_for(
        lambda: read_data_from_hdfs("data-2-node.hdfs.mesos", TEST_FILE_1_NAME
                                    ), HDFS_CMD_TIMEOUT_SEC)

    check_health()
Пример #30
0
def test_integrity_on_name_node_failure():
    """
    The first name node (name-0-node) is the active name node by default when HDFS gets installed.
    This test checks that it is possible to write and read data after the first name node fails.
    """
    kill_task_with_pattern("NameNode", 'name-0-node.hdfs.mesos')
    time.sleep(1)  # give NameNode a chance to die

    shakedown.wait_for(
        lambda: write_data_to_hdfs("data-0-node.hdfs.mesos", TEST_FILE_2_NAME),
        HDFS_CMD_TIMEOUT_SEC)

    shakedown.wait_for(
        lambda: read_data_from_hdfs("data-2-node.hdfs.mesos", TEST_FILE_2_NAME
                                    ), HDFS_CMD_TIMEOUT_SEC)

    check_health()
Пример #31
0
def test_upgrade():
    test_repo_name, test_repo_url = get_test_repo_info()
    test_version = get_pkg_version()
    print('Found test version: {}'.format(test_version))
    remove_repo(test_repo_name, test_version)
    master_version = get_pkg_version()
    print('Found master version: {}'.format(master_version))

    print('Installing master version')
    install({'package_version': master_version})
    check_health()
    write_messages()

    print('Upgrading to test version')
    destroy_service()
    add_repo(test_repo_name, test_repo_url, master_version)
    install({'package_version': test_version})
    check_post_upgrade_health()
Пример #32
0
def test_bump_journal_cpus():
    check_health()
    journal_ids = get_task_ids('journal')
    print('journal ids: ' + str(journal_ids))

    config = get_marathon_config()
    print('marathon config: ')
    print(config)
    cpus = float(config['env']['JOURNAL_CPUS'])
    config['env']['JOURNAL_CPUS'] = str(cpus + 0.1)
    r = request(
        dcos.http.put,
        marathon_api_url('apps/' + PACKAGE_NAME),
        json=config)

    tasks_updated('journal', journal_ids)

    check_health()
Пример #33
0
def test_bump_metadata_cpus():
    check_health()
    meta_data_ids = get_task_ids('meta-data')
    print('meta-data ids: ' + str(meta_data_ids))

    data_ids = get_task_ids('data')
    print('data ids: ' + str(data_ids))

    config = get_marathon_config()
    cpus = float(config['env']['METADATA_CPU'])
    config['env']['METADATA_CPU'] = str(cpus + 0.1)
    r = request(dcos.http.put,
                marathon_api_url('apps/' + PACKAGE_NAME),
                json=config)

    tasks_updated('meta-data', meta_data_ids)
    tasks_not_updated('data', data_ids)

    check_health()
Пример #34
0
def test_bump_world_nodes():
    check_health()

    hello_ids = get_task_ids('hello')
    print('hello ids: ' + str(hello_ids))

    world_ids = get_task_ids('world')
    print('world ids: ' + str(world_ids))

    config = get_marathon_config()
    worldNodeCount = int(config['env']['WORLD_COUNT']) + 1
    config['env']['WORLD_COUNT'] = str(worldNodeCount)
    r = request(dcos.http.put,
                marathon_api_url('apps/' + PACKAGE_NAME),
                json=config)

    check_health(DEFAULT_TASK_COUNT + 1)
    tasks_not_updated('hello', hello_ids)
    tasks_not_updated('world', world_ids)
Пример #35
0
def test_pods_replace():
    world_ids = get_task_ids('world-0')

    # get current agent id:
    stdout = run_dcos_cli_cmd('hello-world pods info world-0')
    old_agent = json.loads(stdout)[0]['info']['slaveId']['value']

    jsonobj = json.loads(run_dcos_cli_cmd('hello-world pods replace world-0'))
    assert len(jsonobj) == 2
    assert jsonobj['pod'] == 'world-0'
    assert len(jsonobj['tasks']) == 1
    assert jsonobj['tasks'][0] == 'world-0-server'

    tasks_updated('world-0', world_ids)
    check_health()

    # check agent moved:
    stdout = run_dcos_cli_cmd('hello-world pods info world-0')
    new_agent = json.loads(stdout)[0]['info']['slaveId']['value']
Пример #36
0
def test_marathon_rack_not_found():
    # install without waiting, since the install should never succeed and a timeout would result in an
    # assertion failure
    install(additional_options = {'service':{'placement_constraint':'rack_id:LIKE:rack-foo-.*'}}, wait=False)
    try:
        check_health()
        assert False, "Should have failed healthcheck"
    except:
        pass # expected to fail, just wanting to wait

    plan = get_plan()

    # check that first node is still (unsuccessfully) looking for a match:
    # reconciliation complete
    assert plan['status'] == 'IN_PROGRESS'
    # phase is pending
    assert plan['phases'][1]['status'] == 'PENDING'
    # step is pending
    assert plan['phases'][1]['steps'][0]['status'] == 'PENDING'
    uninstall()
Пример #37
0
def test_pods_restart():
    hello_ids = get_task_ids('hello-0')

    # get current agent id:
    stdout = run_dcos_cli_cmd('hello-world pods info hello-0')
    old_agent = json.loads(stdout)[0]['info']['slaveId']['value']

    stdout = run_dcos_cli_cmd('hello-world pods restart hello-0')
    jsonobj = json.loads(stdout)
    assert len(jsonobj) == 2
    assert jsonobj['pod'] == 'hello-0'
    assert len(jsonobj['tasks']) == 1
    assert jsonobj['tasks'][0] == 'hello-0-server'

    tasks_updated('hello', hello_ids)
    check_health()

    # check agent didn't move:
    stdout = run_dcos_cli_cmd('hello-world pods info hello-0')
    new_agent = json.loads(stdout)[0]['info']['slaveId']['value']
    assert old_agent == new_agent
Пример #38
0
def test_upgrade_downgrade():
    # Ensure both Universe and the test repo exist.
    # In particular, the Framework Test Suite only runs packages from Universe;
    # it doesn't add a test repo like the PR jobs.
    if len(shakedown.get_package_repos()['repositories']) != 2:
        print('No test repo found.  Skipping test_upgrade_downgrade')
        return

    test_repo_name, test_repo_url = get_test_repo_info()
    test_version = get_pkg_version()
    print('Found test version: {}'.format(test_version))
    remove_repo(test_repo_name, test_version)
    master_version = get_pkg_version()
    print('Found master version: {}'.format(master_version))

    print('Installing master version')
    install(master_version)
    check_health()

    print('Upgrading to test version')
    destroy_service()
    add_repo(test_repo_name, test_repo_url, prev_version=master_version)
    install(test_version)
    check_health()

    print('Downgrading to master version')
    destroy_service()
    install(master_version)
    check_health()
def test_static_port_comes_online(static_port_config):
    check_health()
Пример #40
0
def test_service_becomes_healthy_after_master_fails():
    kill_task_with_pattern('mesos-master')

    check_health()
Пример #41
0
def test_service_becomes_healthy_after_zk_fails():
    kill_task_with_pattern('zookeeper')

    check_health()
Пример #42
0
def test_service_becomes_healthy_after_scheduler_fails():
    kill_task_with_pattern('kafka.scheduler.Main', get_scheduler_host())

    check_health()
Пример #43
0
def test_service_becomes_healthy_after_all_brokers_fail():
    for host in shakedown.get_service_ips(PACKAGE_NAME):
        kill_task_with_pattern('kafka.Kafka', host)

    check_health()
Пример #44
0
def setup_module(module):
    uninstall()
    install(DYNAMIC_PORT_OPTIONS_DICT)
    check_health()
def test_dynamic_port_comes_online(dynamic_port_config):
    check_health()
Пример #46
0
def test_service_becomes_healthy_after_broker_fails():
    kill_task_with_pattern('kafka.Kafka', get_broker_host())

    check_health()