示例#1
0
def uninstall(
        package_name,
        service_name,
        role=None,
        service_account=None,
        zk=None):
    start = time.time()

    global _installed_service_names
    try:
        _installed_service_names.remove(service_name)
    except KeyError:
        pass  # allow tests to 'uninstall' up-front

    log.info('Uninstalling {}'.format(service_name))

    try:
        retried_uninstall_package_and_wait(package_name, service_name=service_name)
    except Exception:
        log.info('Got exception when uninstalling {}'.format(service_name))
        log.info(traceback.format_exc())
        raise
    finally:
        log.info('Reserved resources post uninstall:')
        sdk_utils.list_reserved_resources()

    cleanup_start = time.time()

    try:
        if sdk_utils.dcos_version_less_than('1.10'):
            log.info('Janitoring {}'.format(service_name))
            retried_run_janitor(service_name, role, service_account, zk)
        else:
            log.info('Waiting for Marathon app to be removed {}'.format(service_name))
            sdk_marathon.retried_wait_for_deployment_and_app_removal(
                sdk_marathon.get_app_id(service_name), timeout=TIMEOUT_SECONDS)
    except Exception:
        log.info('Got exception when cleaning up {}'.format(service_name))
        log.info(traceback.format_exc())
        raise
    finally:
        log.info('Reserved resources post cleanup:')
        sdk_utils.list_reserved_resources()

    finish = time.time()

    log.info(
        'Uninstalled {} after pkg({}) + cleanup({}) = total({})'.format(
            service_name,
            shakedown.pretty_duration(cleanup_start - start),
            shakedown.pretty_duration(finish - cleanup_start),
            shakedown.pretty_duration(finish - start)))
示例#2
0
def uninstall(
        package_name,
        service_name,
        role=None,
        service_account=None,
        zk=None):
    start = time.time()

    global _installed_service_names
    try:
        _installed_service_names.remove(service_name)
    except KeyError:
        pass  # allow tests to 'uninstall' up-front

    log.info('Uninstalling {}'.format(service_name))

    try:
        retried_uninstall_package_and_wait(package_name, service_name=service_name)
    except Exception as e:
        log.info('Got exception when uninstalling {}'.format(service_name))
        log.info(traceback.format_exc())
        raise
    finally:
        log.info('Reserved resources post uninstall:')
        sdk_utils.list_reserved_resources()

    cleanup_start = time.time()

    try:
        if sdk_utils.dcos_version_less_than('1.10'):
            log.info('Janitoring {}'.format(service_name))
            retried_run_janitor(service_name, role, service_account, zk)
        else:
            log.info('Waiting for Marathon app to be removed {}'.format(service_name))
            sdk_marathon.retried_wait_for_deployment_and_app_removal(
                sdk_marathon.get_app_id(service_name), timeout=TIMEOUT_SECONDS)
    except Exception as e:
        log.info('Got exception when cleaning up {}'.format(service_name))
        log.info(traceback.format_exc())
        raise
    finally:
        log.info('Reserved resources post cleanup:')
        sdk_utils.list_reserved_resources()

    finish = time.time()

    log.info(
        'Uninstalled {} after pkg({}) + cleanup({}) = total({})'.format(
            service_name,
            shakedown.pretty_duration(cleanup_start - start),
            shakedown.pretty_duration(finish - cleanup_start),
            shakedown.pretty_duration(finish - start)))
示例#3
0
def uninstall(package_name, service_name):
    '''Uninstalls the specified service from the cluster, and verifies that its resources and
    framework were correctly cleaned up after the uninstall has completed. Any agents which are
    expected to have orphaned resources (e.g. due to being shut down) should be passed to
    ignore_dead_agent() before triggering the uninstall.
    '''
    start = time.time()

    log.info('Uninstalling {}'.format(service_name))

    try:
        _retried_uninstall_package_and_wait(package_name, service_name=service_name)
    except Exception:
        log.exception('Got exception when uninstalling {}'.format(service_name))
        raise

    cleanup_start = time.time()

    try:
        if sdk_utils.dcos_version_less_than('1.10'):
            # 1.9 and earlier: Run janitor to unreserve resources
            log.info('Janitoring {}'.format(service_name))
            _retried_run_janitor(service_name)
        else:
            # 1.10 and later: Wait for uninstall scheduler to finish and be removed by Cosmos
            log.info('Waiting for Marathon app to be removed {}'.format(service_name))
            sdk_marathon.retried_wait_for_deployment_and_app_removal(
                sdk_marathon.get_app_id(service_name), timeout=TIMEOUT_SECONDS)
    except Exception:
        log.exception('Got exception when cleaning up {}'.format(service_name))
        raise

    finish = time.time()

    log.info(
        'Uninstalled {} after pkg({}) + cleanup({}) = total({})'.format(
            service_name,
            shakedown.pretty_duration(cleanup_start - start),
            shakedown.pretty_duration(finish - cleanup_start),
            shakedown.pretty_duration(finish - start)))

    # Sanity check: Verify that all resources and the framework have been successfully cleaned up,
    # and throw an exception if anything is left over (uninstall bug?)
    _verify_completed_uninstall(service_name)

    # Finally, remove the service from the installed list (used by sdk_diag)
    global _installed_service_names
    try:
        _installed_service_names.remove(service_name)
    except KeyError:
        pass  # Expected when tests preemptively uninstall at start of test
示例#4
0
def install(
        package_name,
        service_name,
        expected_running_tasks,
        additional_options={},
        package_version=None,
        timeout_seconds=TIMEOUT_SECONDS,
        wait_for_deployment=True):
    start = time.time()
    merged_options = get_package_options(additional_options)

    log.info('Installing {}/{} with options={} version={}'.format(
        package_name, service_name, merged_options, package_version))

    # 1. Install package, wait for tasks, wait for marathon deployment
    retried_shakedown_install(
        package_name,
        service_name,
        package_version,
        merged_options,
        timeout_seconds,
        expected_running_tasks)

    # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit)
    # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete state.
    if wait_for_deployment:
        # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected
        # total task count via ONCE tasks, without actually completing deployment
        log.info("Waiting for {}/{} to finish deployment plan...".format(
            package_name, service_name))
        sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)

    log.info('Installed {}/{} after {}'.format(
        package_name, service_name, shakedown.pretty_duration(time.time() - start)))
示例#5
0
def install(
        package_name,
        service_name,
        expected_running_tasks,
        additional_options={},
        package_version=None,
        timeout_seconds=TIMEOUT_SECONDS,
        wait_for_deployment=True,
        insert_strict_options=True,
        install_cli=True):
    start = time.time()

    # If the package is already installed at this point, fail immediately.
    if sdk_marathon.app_exists(service_name):
        raise dcos.errors.DCOSException('Service is already installed: {}'.format(service_name))

    if insert_strict_options and sdk_utils.is_strict_mode():
        # strict mode requires correct principal and secret to perform install.
        # see also: sdk_security.py
        options = merge_dictionaries({
            'service': {
                'service_account': 'service-acct',
                'principal': 'service-acct',
                'service_account_secret': 'secret',
                'secret_name': 'secret'
            }
        }, additional_options)
    else:
        options = additional_options

    # 1. Install package, wait for tasks, wait for marathon deployment
    _retried_install_impl(
        package_name,
        service_name,
        expected_running_tasks,
        options,
        package_version,
        timeout_seconds,
        install_cli)

    # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit)
    # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete
    # state, or if the thing being installed doesn't have a deployment plan (e.g. standalone app)
    if wait_for_deployment:
        # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected
        # total task count via FINISHED tasks, without actually completing deployment
        log.info('Waiting for package={} service={} to finish deployment plan...'.format(
            package_name, service_name))
        sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)

    log.info('Installed package={} service={} after {}'.format(
        package_name, service_name, shakedown.pretty_duration(time.time() - start)))

    global _installed_service_names
    _installed_service_names.add(service_name)
示例#6
0
def install(
        package_name,
        service_name,
        expected_running_tasks,
        additional_options={},
        package_version=None,
        timeout_seconds=TIMEOUT_SECONDS,
        wait_for_deployment=True,
        insert_strict_options=True):
    start = time.time()

    # If the package is already installed at this point, fail immediately.
    if sdk_marathon.app_exists(service_name):
        raise dcos.errors.DCOSException('Service is already installed: {}'.format(service_name))

    if insert_strict_options and sdk_utils.is_strict_mode():
        # strict mode requires correct principal and secret to perform install.
        # see also: sdk_security.py
        options = merge_dictionaries({
            'service': {
                'service_account': 'service-acct',
                'principal': 'service-acct',
                'service_account_secret': 'secret',
                'secret_name': 'secret'
            }
        }, additional_options)
    else:
        options = additional_options

    # 1. Install package, wait for tasks, wait for marathon deployment
    _retried_install_impl(
        package_name,
        service_name,
        expected_running_tasks,
        options,
        package_version,
        timeout_seconds)

    # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit)
    # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete
    # state, or if the thing being installed doesn't have a deployment plan (e.g. standalone app)
    if wait_for_deployment:
        # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected
        # total task count via FINISHED tasks, without actually completing deployment
        log.info('Waiting for package={} service={} to finish deployment plan...'.format(
            package_name, service_name))
        sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)

    log.info('Installed package={} service={} after {}'.format(
        package_name, service_name, shakedown.pretty_duration(time.time() - start)))

    global _installed_service_names
    _installed_service_names.add(service_name)
示例#7
0
def install(
        package_name,
        expected_running_tasks,
        service_name=None,
        additional_options={},
        package_version=None,
        timeout_seconds=TIMEOUT_SECONDS,
        wait_for_deployment=True):
    if not service_name:
        service_name = package_name
    start = time.time()
    merged_options = get_package_options(additional_options)

    log.info('Installing {}/{} with options={} version={}'.format(
        package_name, service_name, merged_options, package_version))

    # 1. Install package, wait for tasks, wait for marathon deployment
    retried_shakedown_install(
        package_name,
        package_version,
        service_name,
        merged_options,
        timeout_seconds,
        expected_running_tasks)

    # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit)
    # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete state.
    if wait_for_deployment:
        # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected
        # total task count via FINISHED tasks, without actually completing deployment
        log.info("Waiting for {}/{} to finish deployment plan...".format(
            package_name, service_name))
        sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)

        # given the above wait for plan completion, here we just wait up to 5 minutes
        if shakedown.dcos_version_less_than("1.9"):
            log.info("Skipping `is_suppressed` check for %s/%s as this is only suppored starting in version 1.9",
                     package_name, service_name)
        else:
            log.info("Waiting for %s/%s to be suppressed...", package_name, service_name)
            shakedown.wait_for(
                lambda: sdk_api.is_suppressed(service_name),
                noisy=True,
                timeout_seconds=5 * 60)

    log.info('Installed {}/{} after {}'.format(
        package_name, service_name, shakedown.pretty_duration(time.time() - start)))
示例#8
0
def install(
        package_name,
        service_name,
        expected_running_tasks,
        additional_options={},
        package_version=None,
        timeout_seconds=TIMEOUT_SECONDS,
        wait_for_deployment=True):
    start = time.time()
    merged_options = get_package_options(additional_options)

    log.info('Installing {}/{} with options={} version={}'.format(
        package_name, service_name, merged_options, package_version))

    # 1. Install package, wait for tasks, wait for marathon deployment
    retried_shakedown_install(
        package_name,
        service_name,
        package_version,
        merged_options,
        timeout_seconds,
        expected_running_tasks)

    # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit)
    # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete state.
    if wait_for_deployment:
        # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected
        # total task count via FINISHED tasks, without actually completing deployment
        log.info("Waiting for {}/{} to finish deployment plan...".format(
            package_name, service_name))
        sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)

        # given the above wait for plan completion, here we just wait up to 5 minutes
        if shakedown.dcos_version_less_than("1.9"):
            log.info("Skipping `is_suppressed` check for %s/%s as this is only suppored starting in version 1.9",
                     package_name, service_name)
        else:
            log.info("Waiting for %s/%s to be suppressed...", package_name, service_name)
            shakedown.wait_for(
                lambda: sdk_api.is_suppressed(service_name),
                noisy=True,
                timeout_seconds=5 * 60)

    log.info('Installed {}/{} after {}'.format(
        package_name, service_name, shakedown.pretty_duration(time.time() - start)))
示例#9
0
def install(package_name,
            running_task_count,
            service_name=None,
            additional_options={},
            package_version=None,
            check_suppression=True,
            timeout_seconds=15 * 60):
    if not service_name:
        service_name = package_name
    start = time.time()
    merged_options = get_package_options(additional_options)

    sdk_utils.out('Installing {} with options={} version={}'.format(
        package_name, merged_options, package_version))

    # 1. Install package, wait for tasks, wait for marathon deployment
    shakedown.install_package(package_name,
                              package_version=package_version,
                              service_name=service_name,
                              options_json=merged_options,
                              wait_for_completion=True,
                              timeout_sec=timeout_seconds,
                              expected_running_tasks=running_task_count)

    # 2. Ensure the framework is suppressed.
    #
    # This is only configurable in order to support installs from
    # Universe during the upgrade_downgrade tests, because currently
    # the suppression endpoint isn't supported by all frameworks in
    # Universe.  It can be removed once all frameworks rely on
    # dcos-commons >= 0.13.
    if check_suppression:
        sdk_utils.out("Waiting for framework to be suppressed...")
        shakedown.wait_for(lambda: sdk_api.is_suppressed(service_name),
                           noisy=True,
                           timeout_seconds=5 * 60)

    sdk_utils.out('Install done after {}'.format(
        shakedown.pretty_duration(time.time() - start)))
示例#10
0
def uninstall(service_name,
              package_name=None,
              role=None,
              principal=None,
              zk=None):
    start = time.time()

    if package_name is None:
        package_name = service_name

    if shakedown.dcos_version_less_than("1.10"):
        sdk_utils.out('Uninstalling/janitoring {}'.format(service_name))
        try:
            shakedown.uninstall_package_and_wait(package_name,
                                                 service_name=service_name)
        except (dcos.errors.DCOSException, ValueError) as e:
            sdk_utils.out('Got exception when uninstalling package, ' +
                          'continuing with janitor anyway: {}'.format(e))

        janitor_start = time.time()

        # leading slash removed, other slashes converted to double underscores:
        deslashed_service_name = service_name.lstrip('/').replace('/', '__')
        if role is None:
            role = deslashed_service_name + '-role'
        if principal is None:
            principal = service_name + '-principal'
        if zk is None:
            zk = 'dcos-service-' + deslashed_service_name
        janitor_cmd = ('docker run mesosphere/janitor /janitor.py '
                       '-r {role} -p {principal} -z {zk} --auth_token={auth}')
        shakedown.run_command_on_master(
            janitor_cmd.format(
                role=role,
                principal=principal,
                zk=zk,
                auth=shakedown.run_dcos_command(
                    'config show core.dcos_acs_token')[0].strip()))

        finish = time.time()

        sdk_utils.out(
            'Uninstall done after pkg({}) + janitor({}) = total({})'.format(
                shakedown.pretty_duration(janitor_start - start),
                shakedown.pretty_duration(finish - janitor_start),
                shakedown.pretty_duration(finish - start)))
    else:
        sdk_utils.out('Uninstalling {}'.format(service_name))
        try:
            shakedown.uninstall_package_and_wait(package_name,
                                                 service_name=service_name)
            # service_name may already contain a leading slash:
            marathon_app_id = '/' + service_name.lstrip('/')
            sdk_utils.out(
                'Waiting for no deployments for {}'.format(marathon_app_id))
            shakedown.deployment_wait(600, marathon_app_id)

            # wait for service to be gone according to marathon
            def marathon_dropped_service():
                client = shakedown.marathon.create_client()
                app_list = client.get_apps()
                app_ids = [app['id'] for app in app_list]
                sdk_utils.out('Marathon apps: {}'.format(app_ids))
                matching_app_ids = [
                    app_id for app_id in app_ids if app_id == marathon_app_id
                ]
                if len(matching_app_ids) > 1:
                    sdk_utils.out('Found multiple apps with id {}'.format(
                        marathon_app_id))
                return len(matching_app_ids) == 0

            sdk_utils.out(
                'Waiting for no {} Marathon app'.format(marathon_app_id))
            shakedown.time_wait(marathon_dropped_service)

        except (dcos.errors.DCOSException, ValueError) as e:
            sdk_utils.out(
                'Got exception when uninstalling package: {}'.format(e))
        finally:
            sdk_utils.list_reserved_resources()
示例#11
0
def _uninstall(
        package_name,
        service_name,
        role=None,
        service_account=None,
        zk=None):
    start = time.time()

    if shakedown.dcos_version_less_than("1.10"):
        log.info('Uninstalling/janitoring {}'.format(service_name))
        try:
            shakedown.uninstall_package_and_wait(
                package_name, service_name=service_name)
        except (dcos.errors.DCOSException, ValueError) as e:
            log.info('Got exception when uninstalling package, ' +
                          'continuing with janitor anyway: {}'.format(e))
            if 'marathon' in str(e):
                log.info('Detected a probable marathon flake. Raising so retry will trigger.')
                raise

        janitor_start = time.time()

        # leading slash removed, other slashes converted to double underscores:
        deslashed_service_name = service_name.lstrip('/').replace('/', '__')
        if role is None:
            role = deslashed_service_name + '-role'
        if service_account is None:
            service_account = service_name + '-principal'
        if zk is None:
            zk = 'dcos-service-' + deslashed_service_name
        janitor_cmd = ('docker run mesosphere/janitor /janitor.py '
                       '-r {role} -p {service_account} -z {zk} --auth_token={auth}')
        shakedown.run_command_on_master(
            janitor_cmd.format(
                role=role,
                service_account=service_account,
                zk=zk,
                auth=sdk_cmd.run_cli('config show core.dcos_acs_token', print_output=False).strip()))

        finish = time.time()

        log.info(
            'Uninstall done after pkg({}) + janitor({}) = total({})'.format(
                shakedown.pretty_duration(janitor_start - start),
                shakedown.pretty_duration(finish - janitor_start),
                shakedown.pretty_duration(finish - start)))
    else:
        log.info('Uninstalling {}'.format(service_name))
        try:
            shakedown.uninstall_package_and_wait(
                package_name, service_name=service_name)
            # service_name may already contain a leading slash:
            marathon_app_id = '/' + service_name.lstrip('/')
            log.info('Waiting for no deployments for {}'.format(marathon_app_id))
            shakedown.deployment_wait(TIMEOUT_SECONDS, marathon_app_id)

            # wait for service to be gone according to marathon
            def marathon_dropped_service():
                client = shakedown.marathon.create_client()
                app_list = client.get_apps()
                app_ids = [app['id'] for app in app_list]
                log.info('Marathon apps: {}'.format(app_ids))
                matching_app_ids = [
                    app_id for app_id in app_ids if app_id == marathon_app_id
                ]
                if len(matching_app_ids) > 1:
                    log.info('Found multiple apps with id {}'.format(
                        marathon_app_id))
                return len(matching_app_ids) == 0
            log.info('Waiting for no {} Marathon app'.format(marathon_app_id))
            shakedown.time_wait(marathon_dropped_service, timeout_seconds=TIMEOUT_SECONDS)

        except (dcos.errors.DCOSException, ValueError) as e:
            log.info(
                'Got exception when uninstalling package: {}'.format(e))
            if 'marathon' in str(e):
                log.info('Detected a probable marathon flake. Raising so retry will trigger.')
                raise
        finally:
            sdk_utils.list_reserved_resources()
示例#12
0
def _uninstall(
        package_name,
        service_name,
        role=None,
        service_account=None,
        zk=None):
    start = time.time()

    global _installed_service_names
    try:
        _installed_service_names.remove(service_name)
    except KeyError:
        pass # allow tests to 'uninstall' up-front

    if sdk_utils.dcos_version_less_than('1.10'):
        log.info('Uninstalling/janitoring {}'.format(service_name))
        try:
            shakedown.uninstall_package_and_wait(
                package_name, service_name=service_name)
        except (dcos.errors.DCOSException, ValueError) as e:
            log.info('Got exception when uninstalling package, ' +
                          'continuing with janitor anyway: {}'.format(e))
            if 'marathon' in str(e):
                log.info('Detected a probable marathon flake. Raising so retry will trigger.')
                raise

        janitor_start = time.time()

        # leading slash removed, other slashes converted to double underscores:
        deslashed_service_name = service_name.lstrip('/').replace('/', '__')
        if role is None:
            role = deslashed_service_name + '-role'
        if service_account is None:
            service_account = service_name + '-principal'
        if zk is None:
            zk = 'dcos-service-' + deslashed_service_name
        janitor_cmd = ('docker run mesosphere/janitor /janitor.py '
                       '-r {role} -p {service_account} -z {zk} --auth_token={auth}')
        shakedown.run_command_on_master(
            janitor_cmd.format(
                role=role,
                service_account=service_account,
                zk=zk,
                auth=sdk_cmd.run_cli('config show core.dcos_acs_token', print_output=False).strip()))

        finish = time.time()

        log.info(
            'Uninstall done after pkg({}) + janitor({}) = total({})'.format(
                shakedown.pretty_duration(janitor_start - start),
                shakedown.pretty_duration(finish - janitor_start),
                shakedown.pretty_duration(finish - start)))
    else:
        log.info('Uninstalling {}'.format(service_name))
        try:
            shakedown.uninstall_package_and_wait(
                package_name, service_name=service_name)
            # service_name may already contain a leading slash:
            marathon_app_id = '/' + service_name.lstrip('/')
            log.info('Waiting for no deployments for {}'.format(marathon_app_id))
            shakedown.deployment_wait(TIMEOUT_SECONDS, marathon_app_id)

            # wait for service to be gone according to marathon
            client = shakedown.marathon.create_client()
            def marathon_dropped_service():
                app_ids = [app['id'] for app in client.get_apps()]
                log.info('Marathon apps: {}'.format(app_ids))
                matching_app_ids = [
                    app_id for app_id in app_ids if app_id == marathon_app_id
                ]
                if len(matching_app_ids) > 1:
                    log.warning('Found multiple apps with id {}'.format(
                        marathon_app_id))
                return len(matching_app_ids) == 0
            log.info('Waiting for no {} Marathon app'.format(marathon_app_id))
            shakedown.time_wait(marathon_dropped_service, timeout_seconds=TIMEOUT_SECONDS)

        except (dcos.errors.DCOSException, ValueError) as e:
            log.info(
                'Got exception when uninstalling package: {}'.format(e))
            if 'marathon' in str(e):
                log.info('Detected a probable marathon flake. Raising so retry will trigger.')
                raise
        finally:
            sdk_utils.list_reserved_resources()