def wait_for_deployment_and_app_running(app_name: str, timeout: int): shakedown.deployment_wait(timeout, app_name) def app_running(): cmd = 'marathon app show {}'.format(app_name) log.info('Running %s', cmd) app = sdk_cmd.get_json_output(cmd) return is_app_running(app) shakedown.time_wait(app_running, timeout_seconds=timeout)
def time_wait_noisy(predicate, timeout_seconds=DEFAULT_TIMEOUT, ignore_exceptions=True): '''Wrapper of shakedown's spinner which logs the duration of the spin''' start = time.time() def wrapper(): try: result = predicate() except Exception as e: if ignore_exceptions: tb = traceback.format_exc() sdk_utils.out(tb) return False else: raise if not result: sdk_utils.out('[{}/{}] Waiting...'.format( pretty_time(time.time() - start), pretty_time(timeout_seconds))) return result # we perform our own custom handling of exceptions, disable the underlying version: duration = shakedown.time_wait(lambda: wrapper(), timeout_seconds=timeout_seconds, ignore_exceptions=False)
def _retried_install_impl(package_name, service_name, expected_running_tasks, options={}, package_version=None, timeout_seconds=TIMEOUT_SECONDS): '''Cleaned up version of shakedown's package_install().''' package_manager = dcos.packagemanager.PackageManager( dcos.cosmos.get_cosmos_url()) pkg = package_manager.get_package_version(package_name, package_version) if package_version is None: # Get the resolved version for logging below package_version = 'auto:{}'.format(pkg.version()) log.info( 'Installing package={} service={} with options={} version={}'.format( package_name, service_name, options, package_version)) # Trigger package install, but only if it's not already installed. # We expect upstream to have confirmed that it wasn't already installed beforehand. if sdk_marathon.app_exists(service_name): log.info( 'Marathon app={} exists, skipping package install call'.format( service_name)) else: package_manager.install_app(pkg, options) # Install CLI while package starts to install if pkg.cli_definition(): log.info('Installing CLI for package={}'.format(package_name)) dcos.subcommand.install(pkg) # Wait for expected tasks to come up if expected_running_tasks > 0: shakedown.wait_for_service_tasks_running(service_name, expected_running_tasks, timeout_seconds) # Wait for completed marathon deployment app_id = pkg.marathon_json(options).get('id') # TODO: initial wait time here? jenkins install is usually 90-120s # TODO: randomize the sleep_seconds shakedown.time_wait(lambda: shakedown.deployment_predicate(app_id), timeout_seconds, sleep_seconds=20)
def wait_for_deployment_and_app_removal(app_id, timeout=TIMEOUT_SECONDS): """ Waits for application to be gone, according to Marathon. """ log.info('Waiting for no deployments for {}'.format(app_id)) shakedown.deployment_wait(timeout, app_id) client = shakedown.marathon.create_client() def marathon_dropped_app(): app_ids = [app['id'] for app in client.get_apps()] log.info('Marathon app IDs: {}'.format(app_ids)) matching_app_ids = list(filter(lambda x: x == app_id, app_ids)) if len(matching_app_ids) > 1: log.warning('Found multiple apps with id {}'.format(app_id)) return len(matching_app_ids) == 0 log.info('Waiting for no {} Marathon app'.format(app_id)) shakedown.time_wait(marathon_dropped_app, timeout_seconds=timeout)
def deployment_wait(timeout=120, service_id=None): """ Overriding default shakedown method to make it possible to wait for specific pods in addition to apps. However we should probably fix the dcos-cli and remove this method later. """ shakedown.time_wait(lambda: deployment_predicate(service_id), timeout)
def wait_for_mesos_endpoint(timeout_sec=timedelta(minutes=5).total_seconds()): """Checks the service url if available it returns true, on expiration it returns false""" return shakedown.time_wait(lambda: shakedown.mesos_available_predicate(), timeout_seconds=timeout_sec)
def uninstall(service_name, package_name=None, role=None, principal=None, zk=None): start = time.time() if package_name is None: package_name = service_name if shakedown.dcos_version_less_than("1.10"): sdk_utils.out('Uninstalling/janitoring {}'.format(service_name)) try: shakedown.uninstall_package_and_wait(package_name, service_name=service_name) except (dcos.errors.DCOSException, ValueError) as e: sdk_utils.out('Got exception when uninstalling package, ' + 'continuing with janitor anyway: {}'.format(e)) janitor_start = time.time() # leading slash removed, other slashes converted to double underscores: deslashed_service_name = service_name.lstrip('/').replace('/', '__') if role is None: role = deslashed_service_name + '-role' if principal is None: principal = service_name + '-principal' if zk is None: zk = 'dcos-service-' + deslashed_service_name janitor_cmd = ('docker run mesosphere/janitor /janitor.py ' '-r {role} -p {principal} -z {zk} --auth_token={auth}') shakedown.run_command_on_master( janitor_cmd.format( role=role, principal=principal, zk=zk, auth=shakedown.run_dcos_command( 'config show core.dcos_acs_token')[0].strip())) finish = time.time() sdk_utils.out( 'Uninstall done after pkg({}) + janitor({}) = total({})'.format( shakedown.pretty_duration(janitor_start - start), shakedown.pretty_duration(finish - janitor_start), shakedown.pretty_duration(finish - start))) else: sdk_utils.out('Uninstalling {}'.format(service_name)) try: shakedown.uninstall_package_and_wait(package_name, service_name=service_name) # service_name may already contain a leading slash: marathon_app_id = '/' + service_name.lstrip('/') sdk_utils.out( 'Waiting for no deployments for {}'.format(marathon_app_id)) shakedown.deployment_wait(600, marathon_app_id) # wait for service to be gone according to marathon def marathon_dropped_service(): client = shakedown.marathon.create_client() app_list = client.get_apps() app_ids = [app['id'] for app in app_list] sdk_utils.out('Marathon apps: {}'.format(app_ids)) matching_app_ids = [ app_id for app_id in app_ids if app_id == marathon_app_id ] if len(matching_app_ids) > 1: sdk_utils.out('Found multiple apps with id {}'.format( marathon_app_id)) return len(matching_app_ids) == 0 sdk_utils.out( 'Waiting for no {} Marathon app'.format(marathon_app_id)) shakedown.time_wait(marathon_dropped_service) except (dcos.errors.DCOSException, ValueError) as e: sdk_utils.out( 'Got exception when uninstalling package: {}'.format(e)) finally: sdk_utils.list_reserved_resources()
def wait_for_job_started(job_id, run_id, timeout=120): "Verifies that a job with given run_id is in state running or finished. " shakedown.time_wait(lambda: job_run_predicate(job_id, run_id), timeout)
def _uninstall( package_name, service_name, role=None, service_account=None, zk=None): start = time.time() if shakedown.dcos_version_less_than("1.10"): log.info('Uninstalling/janitoring {}'.format(service_name)) try: shakedown.uninstall_package_and_wait( package_name, service_name=service_name) except (dcos.errors.DCOSException, ValueError) as e: log.info('Got exception when uninstalling package, ' + 'continuing with janitor anyway: {}'.format(e)) if 'marathon' in str(e): log.info('Detected a probable marathon flake. Raising so retry will trigger.') raise janitor_start = time.time() # leading slash removed, other slashes converted to double underscores: deslashed_service_name = service_name.lstrip('/').replace('/', '__') if role is None: role = deslashed_service_name + '-role' if service_account is None: service_account = service_name + '-principal' if zk is None: zk = 'dcos-service-' + deslashed_service_name janitor_cmd = ('docker run mesosphere/janitor /janitor.py ' '-r {role} -p {service_account} -z {zk} --auth_token={auth}') shakedown.run_command_on_master( janitor_cmd.format( role=role, service_account=service_account, zk=zk, auth=sdk_cmd.run_cli('config show core.dcos_acs_token', print_output=False).strip())) finish = time.time() log.info( 'Uninstall done after pkg({}) + janitor({}) = total({})'.format( shakedown.pretty_duration(janitor_start - start), shakedown.pretty_duration(finish - janitor_start), shakedown.pretty_duration(finish - start))) else: log.info('Uninstalling {}'.format(service_name)) try: shakedown.uninstall_package_and_wait( package_name, service_name=service_name) # service_name may already contain a leading slash: marathon_app_id = '/' + service_name.lstrip('/') log.info('Waiting for no deployments for {}'.format(marathon_app_id)) shakedown.deployment_wait(TIMEOUT_SECONDS, marathon_app_id) # wait for service to be gone according to marathon def marathon_dropped_service(): client = shakedown.marathon.create_client() app_list = client.get_apps() app_ids = [app['id'] for app in app_list] log.info('Marathon apps: {}'.format(app_ids)) matching_app_ids = [ app_id for app_id in app_ids if app_id == marathon_app_id ] if len(matching_app_ids) > 1: log.info('Found multiple apps with id {}'.format( marathon_app_id)) return len(matching_app_ids) == 0 log.info('Waiting for no {} Marathon app'.format(marathon_app_id)) shakedown.time_wait(marathon_dropped_service, timeout_seconds=TIMEOUT_SECONDS) except (dcos.errors.DCOSException, ValueError) as e: log.info( 'Got exception when uninstalling package: {}'.format(e)) if 'marathon' in str(e): log.info('Detected a probable marathon flake. Raising so retry will trigger.') raise finally: sdk_utils.list_reserved_resources()
def _uninstall( package_name, service_name, role=None, service_account=None, zk=None): start = time.time() global _installed_service_names try: _installed_service_names.remove(service_name) except KeyError: pass # allow tests to 'uninstall' up-front if sdk_utils.dcos_version_less_than('1.10'): log.info('Uninstalling/janitoring {}'.format(service_name)) try: shakedown.uninstall_package_and_wait( package_name, service_name=service_name) except (dcos.errors.DCOSException, ValueError) as e: log.info('Got exception when uninstalling package, ' + 'continuing with janitor anyway: {}'.format(e)) if 'marathon' in str(e): log.info('Detected a probable marathon flake. Raising so retry will trigger.') raise janitor_start = time.time() # leading slash removed, other slashes converted to double underscores: deslashed_service_name = service_name.lstrip('/').replace('/', '__') if role is None: role = deslashed_service_name + '-role' if service_account is None: service_account = service_name + '-principal' if zk is None: zk = 'dcos-service-' + deslashed_service_name janitor_cmd = ('docker run mesosphere/janitor /janitor.py ' '-r {role} -p {service_account} -z {zk} --auth_token={auth}') shakedown.run_command_on_master( janitor_cmd.format( role=role, service_account=service_account, zk=zk, auth=sdk_cmd.run_cli('config show core.dcos_acs_token', print_output=False).strip())) finish = time.time() log.info( 'Uninstall done after pkg({}) + janitor({}) = total({})'.format( shakedown.pretty_duration(janitor_start - start), shakedown.pretty_duration(finish - janitor_start), shakedown.pretty_duration(finish - start))) else: log.info('Uninstalling {}'.format(service_name)) try: shakedown.uninstall_package_and_wait( package_name, service_name=service_name) # service_name may already contain a leading slash: marathon_app_id = '/' + service_name.lstrip('/') log.info('Waiting for no deployments for {}'.format(marathon_app_id)) shakedown.deployment_wait(TIMEOUT_SECONDS, marathon_app_id) # wait for service to be gone according to marathon client = shakedown.marathon.create_client() def marathon_dropped_service(): app_ids = [app['id'] for app in client.get_apps()] log.info('Marathon apps: {}'.format(app_ids)) matching_app_ids = [ app_id for app_id in app_ids if app_id == marathon_app_id ] if len(matching_app_ids) > 1: log.warning('Found multiple apps with id {}'.format( marathon_app_id)) return len(matching_app_ids) == 0 log.info('Waiting for no {} Marathon app'.format(marathon_app_id)) shakedown.time_wait(marathon_dropped_service, timeout_seconds=TIMEOUT_SECONDS) except (dcos.errors.DCOSException, ValueError) as e: log.info( 'Got exception when uninstalling package: {}'.format(e)) if 'marathon' in str(e): log.info('Detected a probable marathon flake. Raising so retry will trigger.') raise finally: sdk_utils.list_reserved_resources()