def test_config_update_then_master_killed():
    master_leader_ip = shakedown.master_leader_ip()
    run_planned_operation(
        lambda: bump_cpu_count_config(-0.1),
        lambda: kill_task_with_pattern('mesos-master', master_leader_ip))
    verify_leader_changed(master_leader_ip)
    check_health()
def test_partition_master_outgoing():
    master_leader_ip = shakedown.master_leader_ip()
    shakedown.partition_master(master_leader_ip, incoming=False, outgoing=True)
    time.sleep(20)
    shakedown.reconnect_master(master_leader_ip)

    check_health()
def test_partition_master_both_ways():
    master_leader_ip = shakedown.master_leader_ip()
    shakedown.partition_master(master_leader_ip)
    time.sleep(20)
    shakedown.reconnect_master(master_leader_ip)

    check_health()
def test_config_update_then_zk_killed():
    master_leader_ip = shakedown.master_leader_ip()
    run_planned_operation(
        bump_cpu_count_config,
        lambda: kill_task_with_pattern('zookeeper', master_leader_ip),
        lambda: verify_leader_changed(master_leader_ip))

    check_health()
def test_cleanup_then_master_killed():
    master_leader_ip = shakedown.master_leader_ip()
    run_planned_operation(
        run_cleanup,
        lambda: kill_task_with_pattern('mesos-master', master_leader_ip))

    verify_leader_changed(master_leader_ip)
    check_health()
def test_repair_then_zk_killed():
    master_leader_ip = shakedown.master_leader_ip()
    run_planned_operation(
        run_repair,
        lambda: kill_task_with_pattern('zookeeper', master_leader_ip),
        lambda: verify_leader_changed(master_leader_ip))

    check_health()
示例#7
0
文件: command.py 项目: dcos/shakedown
def run_command_on_leader(
        command,
        username=None,
        key_path=None,
        noisy=True
):
    """ Run a command on the Mesos leader.  Important for Multi-Master.
    """

    return run_command(shakedown.master_leader_ip(), command, username, key_path, noisy)
示例#8
0
def get_marathon_leader_not_on_master_leader_node():
    marathon_leader = shakedown.marathon_leader_ip()
    master_leader = shakedown.master_leader_ip()
    print('marathon leader: {}'.format(marathon_leader))
    print('mesos leader: {}'.format(master_leader))

    if marathon_leader == master_leader:
        delete_marathon_path('v2/leader')
        shakedown.wait_for_service_endpoint('marathon', timedelta(minutes=5).total_seconds())
        marathon_leader = assert_marathon_leadership_changed(marathon_leader)
        print('switched leader to: {}'.format(marathon_leader))

    return marathon_leader
示例#9
0
def get_marathon_leader_not_on_master_leader_node():
    marathon_leader = shakedown.marathon_leader_ip()
    master_leader = shakedown.master_leader_ip()
    print('marathon leader: {}'.format(marathon_leader))
    print('mesos leader: {}'.format(master_leader))

    if marathon_leader == master_leader:
        delete_marathon_path('v2/leader')
        shakedown.wait_for_service_endpoint('marathon', timedelta(minutes=5).total_seconds())
        marathon_leader = assert_marathon_leadership_changed(marathon_leader)
        print('switched leader to: {}'.format(marathon_leader))

    return marathon_leader
示例#10
0
def test_zk_killed():
    time.sleep(60)
    log.info("Starting {}".format(sys._getframe().f_code.co_name))

    master_leader_ip = shakedown.master_leader_ip()
    log.info("master leader ip- " + master_leader_ip)

    kill_task_with_pattern('zookeeper', master_leader_ip)

    time.sleep(60)
    #_block_on_adminrouter(master_leader_ip)
    verify_leader_changed(master_leader_ip)
    time.sleep(60)
    check_health()
示例#11
0
def get_marathon_leader_not_on_master_leader_node():
    marathon_leader = shakedown.marathon_leader_ip()
    master_leader = shakedown.master_leader_ip()
    print('marathon: {}'.format(marathon_leader))
    print('leader: {}'.format(master_leader))

    if marathon_leader == master_leader:
        delete_marathon_path('v2/leader')
        shakedown.wait_for_service_endpoint('marathon', timedelta(minutes=5).total_seconds())
        new_leader = shakedown.marathon_leader_ip()
        assert new_leader != marathon_leader, "A new Marathon leader has not been elected"
        marathon_leader = new_leader
        print('switched leader to: {}'.format(marathon_leader))

    return marathon_leader
示例#12
0
def get_marathon_leader_not_on_master_leader_node():
    marathon_leader = shakedown.marathon_leader_ip()
    master_leader = shakedown.master_leader_ip()
    print('marathon: {}'.format(marathon_leader))
    print('leader: {}'.format(master_leader))

    if marathon_leader == master_leader:
        delete_marathon_path('v2/leader')
        shakedown.wait_for_service_endpoint('marathon', timedelta(minutes=5).total_seconds())
        new_leader = shakedown.marathon_leader_ip()
        assert new_leader != marathon_leader, "A new Marathon leader has not been elected"
        marathon_leader = new_leader
        print('switched leader to: {}'.format(marathon_leader))

    return marathon_leader
示例#13
0
def test_zk_killed_recovery():
    time.sleep(60)
    log.info("Starting {}".format(sys._getframe().f_code.co_name))

    master_leader_ip = shakedown.master_leader_ip()
    log.info("master leader ip- " + master_leader_ip)

    kill_task_with_pattern('zookeeper', master_leader_ip)

    _block_on_adminrouter(master_leader_ip)
    time.sleep(60)
    log.info("Taking a health check")
    check_health()
    print("Sleeping for 120 sec")
    time.sleep(120)
示例#14
0
def test_metronome_shutdown_with_no_extra_tasks():
    """ Test for METRONOME-100 regression
        When Metronome is restarted it incorrectly started another task for already running job run task.
    """
    client = metronome.create_client()
    job_id = "metronome-shutdown-{}".format(uuid.uuid4().hex)
    with job(job_no_schedule(job_id)):
        # run a job before we shutdown Metronome
        run_id = client.run_job(job_id)["id"]
        common.wait_for_job_started(job_id, run_id)
        common.assert_job_run(client, job_id)

        # restart metronome process
        # this won't work in multi-master setup if the mesos leader is not the same as metronome leader
        # we can improve this one there is a good way how to get metronome leader from the system (e.g. info endpoint)
        metronome_leader = shakedown.master_leader_ip()
        shakedown.run_command_on_agent(metronome_leader, 'sudo systemctl restart dcos-metronome')
        common.wait_for_metronome()

        # verify that no extra job runs were started when Metronome was restarted
        common.assert_wait_for_no_additional_tasks(tasks_count=1, client=client, job_id=job_id)
示例#15
0
def run_command_on_leader(command, username=None, key_path=None, noisy=True):
    """ Run a command on the Mesos leader.  Important for Multi-Master.
    """

    return run_command(shakedown.master_leader_ip(), command, username,
                       key_path, noisy)
def test_zk_killed_recovery():
    master_leader_ip = shakedown.master_leader_ip()
    kill_task_with_pattern('zookeeper', master_leader_ip)

    _block_on_adminrouter(master_leader_ip)
    check_health()
def test_zk_killed():
    master_leader_ip = shakedown.master_leader_ip()
    kill_task_with_pattern('zookeeper', master_leader_ip)

    verify_leader_changed(master_leader_ip)
    check_health()
示例#18
0
 def fn():
     try:
         return shakedown.master_leader_ip()
     except DCOSAuthenticationException:
         log.error("Got exception while fetching leader")
     return old_leader_ip
def test_master_killed_block_on_admin_router():
    master_leader_ip = shakedown.master_leader_ip()
    kill_task_with_pattern('mesos-master', master_leader_ip)

    verify_leader_changed(master_leader_ip)
    check_health()