Exemplo n.º 1
0
def test_marathon_delete_leader(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    marathon_leadership_changed(original_leader)
Exemplo n.º 2
0
def test_marathon_delete_leader_and_check_apps(marathon_service_name):

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    # start an app
    app_def = common.app(id=uuid.uuid4().hex)
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())

    @retrying.retry(stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        app = client.get_app(app_id)
        assert app['tasksRunning'] == expected_instances

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    client.remove_app(app_id)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 0

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())

    # wait until leader changed
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
def test_marathon_delete_leader_and_check_apps(marathon_service_name):

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    # start an app
    app_def = common.app(id=uuid.uuid4().hex)
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(stop_max_attempt_number=30)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(stop_max_attempt_number=30)
    def check_app_existence(expected_instances):
        app = client.get_app(app_id)
        assert app['tasksRunning'] == expected_instances

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    client.remove_app(app_id)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 0

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
Exemplo n.º 4
0
def test_marathon_delete_leader(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    marathon_leadership_changed()
Exemplo n.º 5
0
def test_marathon_delete_leader(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    marathon_leadership_changed()
Exemplo n.º 6
0
def test_marathon_delete_leader(marathon_service_name):

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    common.delete_marathon_path('v2/leader')

    common.wait_for_marathon_up()

    @retrying.retry(stop_max_attempt_number=30)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    marathon_leadership_changed()
Exemplo n.º 7
0
def test_marathon_delete_leader(marathon_service_name):

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    common.delete_marathon_path('v2/leader')

    common.wait_for_marathon_up()

    @retrying.retry(stop_max_attempt_number=30)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    marathon_leadership_changed()
Exemplo n.º 8
0
def test_marathon_backup_and_restore_leader(marathon_service_name):
    """Backup and restore meeting is done with only one master since new master has to be able
       to read the backup file that was created by the previous master and the easiest way to
       test it is when there is 1 master
    """

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app[
        'tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(
            app["tasksRunning"])
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    # Wait for new leader (but same master server) to be up and ready
    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())
    app = client.get_app(app_id)
    assert app[
        'tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(
            app["tasksRunning"])
    assert task_id == app['tasks'][0][
        'id'], "Task has a different ID after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    status, data = shakedown.run_command_on_master(cmd)
    assert status, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
Exemplo n.º 9
0
def test_marathon_backup_and_restore_leader(marathon_service_name):

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    client = marathon.create_client()
    app_def = {
        "id": "/sleep",
        "instances": 1,
        "cpus": 0.01,
        "mem": 32,
        "cmd": "sleep 100000"
    }

    app_id = app_def['id']
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    # Wait for new leader (but same master server) to be up and ready
    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())
    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1
    assert task_id == app['tasks'][0][
        'id'], "Task has a different Id after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    run, data = shakedown.run_command_on_master(cmd)
    assert run, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
Exemplo n.º 10
0
def test_marathon_backup_and_restore_leader(marathon_service_name):

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    client = marathon.create_client()
    app_def = {
        "id": "/sleep",
        "instances": 1,
        "cpus": 0.01,
        "mem": 32,
        "cmd": "sleep 100000"
    }

    app_id = app_def['id']
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    # Wait for new leader (but same master server) to be up and ready
    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())
    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1
    assert task_id == app['tasks'][0]['id'], "Task has a different Id after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    status, data = shakedown.run_command_on_master(cmd)
    assert status, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
Exemplo n.º 11
0
def test_marathon_backup_and_restore_leader(marathon_service_name):
    """Backup and restore meeting is done with only one master since new master has to be able
       to read the backup file that was created by the previous master and the easiest way to
       test it is when there is 1 master
    """

    backup_file = 'backup.tar'
    backup_dir = '/tmp'
    backup_url = 'file://{}/{}'.format(backup_dir, backup_file)

    # Deploy a simple test app. It is expected to be there after leader reelection
    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])
    task_id = app['tasks'][0]['id']

    # Abdicate the leader with backup and restore
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}&restore={}'.format(backup_url, backup_url)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    # Wait for new leader (but same master server) to be up and ready
    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())
    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])
    assert task_id == app['tasks'][0]['id'], "Task has a different ID after restore"

    # Check if the backup file exits and is valid
    cmd = 'tar -tf {}/{} | wc -l'.format(backup_dir, backup_file)
    status, data = shakedown.run_command_on_master(cmd)
    assert status, 'Failed to validate backup file {}'.format(backup_url)
    assert int(data.rstrip()) > 0, "Backup file is empty"
Exemplo n.º 12
0
def test_marathon_delete_leader_and_check_apps(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        if original_leader == current_leader:
            common.delete_marathon_path('v2/leader')
        assert original_leader != current_leader, "A new Marathon leader has not been elected"

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        app = client.get_app(app_id)
        assert app['tasksRunning'] == expected_instances
        assert app['tasksRunning'] == expected_instances, \
            "The number of running tasks is {}, but {} was expected".format(app["tasksRunning"], expected_instances)

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def remove_app(app_id):
        client.remove_app(app_id)

    remove_app(app_id)
    shakedown.deployment_wait()

    try:
        _ = client.get_app(app_id)
    except:
        pass
    else:
        assert False, "The application resurrected"

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    marathon_leadership_changed()

    # check if app definition is still not there
    try:
        _ = client.get_app(app_id)
    except:
        pass
    else:
        assert False, "The application resurrected"
Exemplo n.º 13
0
def test_marathon_backup_and_check_apps(marathon_service_name):

    backup_file1 = 'backup1.tar'
    backup_file2 = 'backup2.tar'
    backup_dir = '/tmp'

    for master_ip in shakedown.get_all_master_ips():
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file1))
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file2))

    backup_url1 = 'file://{}/{}'.format(backup_dir, backup_file1)
    backup_url2 = 'file://{}/{}'.format(backup_dir, backup_file2)

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url1)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader, "A new Marathon leader has not been elected"

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        try:
            app = client.get_app(app_id)
        except Exception as e:
            if expected_instances != 0:
                raise e
        else:
            if expected_instances == 0:
                assert False, "The application resurrected"
            else:
                app['tasksRunning'] == expected_instances, \
                    "The number of running tasks is {}, but {} was expected".format(
                        app["tasksRunning"], expected_instances)

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    # then remove
    client.remove_app(app_id)
    shakedown.deployment_wait()

    check_app_existence(0)

    # Do a second backup. Before MARATHON-7525 we had the problem, that doing a backup after an app was deleted
    # leads to the state that marathon was not able to re-start, because the second backup failed constantly.

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url2)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    # if leader changed, this means that marathon was able to start again, which is great :-).
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
Exemplo n.º 14
0
 def marathon_leadership_changed():
     current_leader = shakedown.marathon_leader_ip()
     print('leader: {}'.format(current_leader))
     if original_leader == current_leader:
         common.delete_marathon_path('v2/leader')
     assert original_leader != current_leader, "A new Marathon leader has not been elected"
Exemplo n.º 15
0
def test_marathon_delete_leader_and_check_apps(marathon_service_name):
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        if original_leader == current_leader:
            common.delete_marathon_path('v2/leader')
        assert original_leader != current_leader, "A new Marathon leader has not been elected"

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        app = client.get_app(app_id)
        assert app['tasksRunning'] == expected_instances
        assert app['tasksRunning'] == expected_instances, \
            "The number of running tasks is {}, but {} was expected".format(app["tasksRunning"], expected_instances)

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def remove_app(app_id):
        client.remove_app(app_id)

    remove_app(app_id)
    shakedown.deployment_wait()

    try:
        _ = client.get_app(app_id)
    except:
        pass
    else:
        assert False, "The application resurrected"

    # abdicate leader after app was started successfully
    common.delete_marathon_path('v2/leader')

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    marathon_leadership_changed()

    # check if app definition is still not there
    try:
        _ = client.get_app(app_id)
    except:
        pass
    else:
        assert False, "The application resurrected"
Exemplo n.º 16
0
def test_marathon_backup_and_check_apps(marathon_service_name):

    backup_file1 = 'backup1.tar'
    backup_file2 = 'backup2.tar'
    backup_dir = '/tmp'

    for master_ip in shakedown.get_all_master_ips():
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file1))
        _ = shakedown.run_command(master_ip, "rm {}/{}".format(backup_dir, backup_file2))

    backup_url1 = 'file://{}/{}'.format(backup_dir, backup_file1)
    backup_url2 = 'file://{}/{}'.format(backup_dir, backup_file2)

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    app_def = apps.sleep_app()
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1, "The number of running tasks is {}, but 1 was expected".format(app["tasksRunning"])

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url1)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader, "A new Marathon leader has not been elected"

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(wait_fixed=1000, stop_max_attempt_number=30, retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        try:
            app = client.get_app(app_id)
        except Exception as e:
            if expected_instances != 0:
                raise e
        else:
            if expected_instances == 0:
                assert False, "The application resurrected"
            else:
                app['tasksRunning'] == expected_instances, \
                    "The number of running tasks is {}, but {} was expected".format(
                        app["tasksRunning"], expected_instances)

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    # then remove
    client.remove_app(app_id)
    shakedown.deployment_wait()

    check_app_existence(0)

    # Do a second backup. Before MARATHON-7525 we had the problem, that doing a backup after an app was deleted
    # leads to the state that marathon was not able to re-start, because the second backup failed constantly.

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url2)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name, timedelta(minutes=5).total_seconds())

    # wait until leader changed
    # if leader changed, this means that marathon was able to start again, which is great :-).
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
Exemplo n.º 17
0
def test_marathon_backup_and_check_apps(marathon_service_name):

    backup_file1 = 'backup1.tar'
    backup_file2 = 'backup2.tar'
    backup_dir = '/tmp'
    backup_url1 = 'file://{}/{}'.format(backup_dir, backup_file1)
    backup_url2 = 'file://{}/{}'.format(backup_dir, backup_file2)

    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))

    # start an app
    app_def = common.app(id=uuid.uuid4().hex)
    app_id = app_def['id']

    client = marathon.create_client()
    client.add_app(app_def)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 1

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url1)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())

    @retrying.retry(stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def marathon_leadership_changed():
        current_leader = shakedown.marathon_leader_ip()
        print('leader: {}'.format(current_leader))
        assert original_leader != current_leader

    # wait until leader changed
    marathon_leadership_changed()

    @retrying.retry(stop_max_attempt_number=30,
                    retry_on_exception=common.ignore_exception)
    def check_app_existence(expected_instances):
        app = client.get_app(app_id)
        assert app['tasksRunning'] == expected_instances

    # check if app definition is still there and one instance is still running after new leader was elected
    check_app_existence(1)

    # then remove
    client.remove_app(app_id)
    shakedown.deployment_wait()

    app = client.get_app(app_id)
    assert app['tasksRunning'] == 0

    # Do a second backup. Before MARATHON-7525 we had the problem, that doing a backup after an app was deleted
    # leads to the state that marathon was not able to re-start, because the second backup failed constantly.

    # Abdicate the leader with backup
    original_leader = shakedown.marathon_leader_ip()
    print('leader: {}'.format(original_leader))
    url = 'v2/leader?backup={}'.format(backup_url2)
    print('DELETE {}'.format(url))
    common.delete_marathon_path(url)

    shakedown.wait_for_service_endpoint(marathon_service_name,
                                        timedelta(minutes=5).total_seconds())

    # wait until leader changed
    # if leader changed, this means that marathon was able to start again, which is great :-).
    marathon_leadership_changed()

    # check if app definition is still not there and no instance is running after new leader was elected
    check_app_existence(0)
Exemplo n.º 18
0
 def marathon_leadership_changed():
     current_leader = shakedown.marathon_leader_ip()
     print('leader: {}'.format(current_leader))
     if original_leader == current_leader:
         common.delete_marathon_path('v2/leader')
     assert original_leader != current_leader, "A new Marathon leader has not been elected"