示例#1
0
def events_to_file():
    leader_ip = shakedown.marathon_leader_ip()
    print("entering events_to_file fixture")
    shakedown.run_command(leader_ip, 'rm events.txt')

    # In strict mode marathon runs in SSL mode on port 8443 and requires authentication
    if shakedown.ee_version() == 'strict':
        shakedown.run_command(
            leader_ip,
            '(curl --compressed -H "Cache-Control: no-cache" -H "Accept: text/event-stream" '
            + '-H "Authorization: token={}" '.format(
                shakedown.dcos_acs_token()) +
            '-o events.txt -k https://marathon.mesos:8443/v2/events; echo $? > events.exitcode) &'
        )

    # Otherwise marathon runs on HTTP mode on port 8080
    else:
        shakedown.run_command(
            leader_ip,
            '(curl --compressed -H "Cache-Control: no-cache" -H "Accept: text/event-stream" '
            '-o events.txt http://marathon.mesos:8080/v2/events; echo $? > events.exitcode) &'
        )

    yield
    shakedown.kill_process_on_host(leader_ip, '[c]url')
    shakedown.run_command(leader_ip, 'rm events.txt')
    shakedown.run_command(leader_ip, 'rm events.exitcode')
    print("exiting events_to_file fixture")
示例#2
0
 def check_deployment_message():
     status, stdout = shakedown.run_command(leader_ip,
                                            'cat events.exitcode')
     assert str(stdout).strip(
     ) == '', "SSE stream disconnected (CURL exit code is {})".format(
         stdout.strip())
     status, stdout = shakedown.run_command(leader_ip, 'cat events.txt')
     assert 'event_stream_attached' in stdout, "event_stream_attached event has not been found"
     assert 'deployment_info' in stdout, "deployment_info event has not been found"
     assert 'deployment_step_success' in stdout, "deployment_step_success has not been found"
示例#3
0
def run_command_on_metronome_leader(command,
                                    username=None,
                                    key_path=None,
                                    noisy=True):
    """ Run a command on the Metronome leader
    """

    return shakedown.run_command(metronome_leader_ip(), command, username,
                                 key_path, noisy)
示例#4
0
def dcos_masters_public_ips():
    """
    retrieves public ips of all masters

    :return: public ips of all masters
    """
    @retrying.retry(
        wait_fixed=1000,
        stop_max_attempt_number=240,  # waiting 20 minutes for exhibitor start-up
        retry_on_exception=ignore_provided_exception(DCOSException))
    def all_master_ips():
        return get_all_master_ips()

    master_public_ips = [shakedown.run_command(private_ip, '/opt/mesosphere/bin/detect_ip_public')[1]
                         for private_ip in all_master_ips()]

    return master_public_ips
def gc_frameworks():
    for host in shakedown.get_private_agents():
        shakedown.run_command(
            host, "sudo rm -rf /var/lib/mesos/slave/slaves/*/frameworks/*")
示例#6
0
def test_marathon_backup_and_check_apps(marathon_service_name):

    backup_file1 = 'backup1.tar'
    backup_file2 = 'backup2.tar'
    backup_dir = '/tmp'

    for master_ip in shakedown.get_all_master_ips():
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file1))
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file2))

    backup_url1 = 'file://{}/{}'.format(backup_dir, backup_file1)
    backup_url2 = 'file://{}/{}'.format(backup_dir, backup_file2)

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url1)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader, "A new Marathon leader has not been elected"

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        try:
            app = client.get_app(app_id)
        except Exception as e:
            if expected_instances != 0:
                raise e
        else:
            if expected_instances == 0:
                assert False, "The application resurrected"
            else:
                app['tasksRunning'] == expected_instances, \
                    "The number of running tasks is {}, but {} was expected".format(
                        app["tasksRunning"], expected_instances)

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    # then remove
    client.remove_app(app_id)
    shakedown.deployment_wait()

    check_app_existence(0)

    # Do a second backup. Before MARATHON-7525 we had the problem, that doing a backup after an app was deleted
    # leads to the state that marathon was not able to re-start, because the second backup failed constantly.

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url2)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    # if leader changed, this means that marathon was able to start again, which is great :-).
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
示例#7
0
 def check_kill_message():
     status, stdout = shakedown.run_command(master_ip, 'cat events.txt')
     assert 'KILLED' in stdout, "KILLED event has not been found"
示例#8
0
 def check_deployment_message():
     status, stdout = shakedown.run_command(master_ip, 'cat events.txt')
     assert 'event_stream_attached' in stdout, "event_stream_attached event has not been found"
     assert 'deployment_info' in stdout, "deployment_info event has not been found"
     assert 'deployment_step_success' in stdout, "deployment_step_success has not been found"
 def check_update_message():
     status, stdout = shakedown.run_command(leader_ip, 'cat events.txt')
     assert 'pod_updated_event' in stdout, 'pod_update_event event has not been produced'
示例#10
0
def gc_frameworks():
    '''Reclaims private agent disk space consumed by Mesos but not yet garbage collected'''
    for host in shakedown.get_private_agents():
        shakedown.run_command(
            host, "sudo rm -rf /var/lib/mesos/slave/slaves/*/frameworks/*")
示例#11
0
def gc_frameworks():
    for host in shakedown.get_private_agents():
        shakedown.run_command(host, "sudo rm -rf /var/lib/mesos/slave/slaves/*/frameworks/*")
示例#12
0
def test_marathon_backup_and_check_apps(marathon_service_name):

    backup_file1 = 'backup1.tar'
    backup_file2 = 'backup2.tar'
    backup_dir = '/tmp'

    for master_ip in shakedown.get_all_master_ips():
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file1))
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file2))

    backup_url1 = 'file://{}/{}'.format(backup_dir, backup_file1)
    backup_url2 = 'file://{}/{}'.format(backup_dir, backup_file2)

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url1)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader, "A new Marathon leader has not been elected"

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        try:
            app = client.get_app(app_id)
        except Exception as e:
            if expected_instances != 0:
                raise e
        else:
            if expected_instances == 0:
                assert False, "The application resurrected"
            else:
                app['tasksRunning'] == expected_instances, \
                    "The number of running tasks is {}, but {} was expected".format(
                        app["tasksRunning"], expected_instances)

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    # then remove
    client.remove_app(app_id)
    shakedown.deployment_wait()

    check_app_existence(0)

    # Do a second backup. Before MARATHON-7525 we had the problem, that doing a backup after an app was deleted
    # leads to the state that marathon was not able to re-start, because the second backup failed constantly.

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url2)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    # if leader changed, this means that marathon was able to start again, which is great :-).
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
示例#13
0
 def check_kill_message():
     status, stdout = shakedown.run_command(master_ip, 'cat events.txt')
     assert 'KILLED' in stdout, "KILLED event has not been found"
示例#14
0
 def check_deployment_message():
     status, stdout = shakedown.run_command(master_ip, 'cat events.txt')
     assert 'event_stream_attached' in stdout, "event_stream_attached event has not been found"
     assert 'deployment_info' in stdout, "deployment_info event has not been found"
     assert 'deployment_step_success' in stdout, "deployment_step_success has not been found"
示例#15
0
文件: common.py 项目: dcos/metronome
def run_command_on_metronome_leader(command, username=None, key_path=None, noisy=True):
    """ Run a command on the Metronome leader
    """

    return shakedown.run_command(metronome_leader_ip(), command, username, key_path, noisy)