def get_dcos_cassandra_plan(service_name): utils.out('Waiting for {} plan to complete...'.format(service_name)) def fn(): return api.get(service_name, '/v1/plan') return spin.time_wait_return(fn)
def test_deploy(): deployment_plan = plan.get_deployment_plan(PACKAGE_NAME).json() sdk_utils.out("deployment_plan: " + str(deployment_plan)) assert (len(deployment_plan['phases']) == 1) assert (deployment_plan['phases'][0]['name'] == 'hello') assert (len(deployment_plan['phases'][0]['steps']) == 1)
def test_port_static_to_static_port(): tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) broker_ids = tasks.get_task_ids(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE)) config = marathon.get_config(SERVICE_NAME) utils.out('Old Config :{}'.format(config)) for broker_id in range(DEFAULT_BROKER_COUNT): result = service_cli('broker get {}'.format(broker_id)) assert result['port'] == 9092 config['env']['BROKER_PORT'] = '9095' marathon.update_app(SERVICE_NAME, config) utils.out('New Config :{}'.format(config)) tasks.check_tasks_updated(SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids) # all tasks are running tasks.check_running(SERVICE_NAME, DEFAULT_BROKER_COUNT) for broker_id in range(DEFAULT_BROKER_COUNT): result = service_cli('broker get {}'.format(broker_id)) assert result['port'] == 9095
def test_deploy(): wait_time = 30 # taskcfg.yml will initially fail to deploy because several options are missing in the default # marathon.json.mustache. verify that tasks are failing for 30s before continuing. sdk_utils.out('Checking that tasks are failing to launch for at least {}s'.format(wait_time)) # we can get brief blips of TASK_RUNNING but they shouldnt last more than 2-3s: consecutive_task_running = 0 def fn(): nonlocal consecutive_task_running svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME) states = [t['state'] for t in svc_tasks] sdk_utils.out('Task states: {}'.format(states)) if 'TASK_RUNNING' in states: consecutive_task_running += 1 assert consecutive_task_running <= 3 else: consecutive_task_running = 0 return False try: spin.time_wait_noisy(lambda: fn(), timeout_seconds=wait_time) except shakedown.TimeoutExpired: sdk_utils.out('Timeout reached as expected') # add the needed envvars in marathon and confirm that the deployment succeeds: config = marathon.get_config(PACKAGE_NAME) env = config['env'] del env['SLEEP_DURATION'] env['TASKCFG_ALL_OUTPUT_FILENAME'] = 'output' env['TASKCFG_ALL_SLEEP_DURATION'] = '1000' marathon.update_app(PACKAGE_NAME, config) check_running()
def install( package_name, running_task_count, service_name=None, additional_options={}, package_version=None, check_suppression=True): if not service_name: service_name = package_name start = time.time() merged_options = get_package_options(additional_options) sdk_utils.out('Installing {} with options={} version={}'.format( package_name, merged_options, package_version)) # install_package_and_wait silently waits for all marathon deployments to clear. # to give some visibility, install in the following order: # 1. install package shakedown.install_package( package_name, package_version=package_version, options_json=merged_options) # 2. wait for expected tasks to come up sdk_utils.out("Waiting for expected tasks to come up...") sdk_tasks.check_running(service_name, running_task_count) sdk_plan.wait_for_completed_deployment(service_name) # 3. check service health marathon_client = dcos.marathon.create_client() def is_deployment_finished(): # TODO(nickbp): upstream fix to shakedown, which currently checks for ANY deployments rather # than the one we care about deploying_apps = set([]) sdk_utils.out("Getting deployments") deployments = marathon_client.get_deployments() sdk_utils.out("Found {} deployments".format(len(deployments))) for deployment in deployments: sdk_utils.out("Deployment: {}".format(deployment)) for app in deployment.get('affectedApps', []): sdk_utils.out("Adding {}".format(app)) deploying_apps.add(app) sdk_utils.out('Checking that deployment of {} has ended:\n- Deploying apps: {}'.format(service_name, deploying_apps)) return not '/{}'.format(service_name) in deploying_apps sdk_utils.out("Waiting for marathon deployment to finish...") sdk_spin.time_wait_noisy(is_deployment_finished) # 4. Ensure the framework is suppressed. # # This is only configurable in order to support installs from # Universe during the upgrade_downgrade tests, because currently # the suppression endpoint isn't supported by all frameworks in # Universe. It can be removed once all frameworks rely on # dcos-commons >= 0.13. if check_suppression: sdk_utils.out("Waiting for framework to be suppressed...") sdk_spin.time_wait_noisy( lambda: sdk_api.is_suppressed(service_name)) sdk_utils.out('Install done after {}'.format(sdk_spin.pretty_time(time.time() - start)))
def test_modify_app_config_rollback(): app_config_field = 'TASKCFG_ALL_CLIENT_READ_SHORTCIRCUIT_STREAMS_CACHE_SIZE_EXPIRY_MS' journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal') data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data') old_config = marathon.get_config(PACKAGE_NAME) config = marathon.get_config(PACKAGE_NAME) sdk_utils.out('marathon config: ') sdk_utils.out(config) expiry_ms = int(config['env'][app_config_field]) sdk_utils.out('expiry ms: ' + str(expiry_ms)) config['env'][app_config_field] = str(expiry_ms + 1) marathon.update_app(PACKAGE_NAME, config) # Wait for journal nodes to be affected by the change tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids) journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal') sdk_utils.out('old config: ') sdk_utils.out(old_config) # Put the old config back (rollback) marathon.update_app(PACKAGE_NAME, old_config) # Wait for the journal nodes to return to their old configuration tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids) check_healthy() config = marathon.get_config(PACKAGE_NAME) assert int(config['env'][app_config_field]) == expiry_ms # Data tasks should not have been affected tasks.check_tasks_not_updated(PACKAGE_NAME, 'data', data_ids)
def get_plan(service_name, plan): sdk_utils.out("Waiting for {} plan to complete...".format(service_name)) def fn(): return sdk_api.get(service_name, "/v1/plans/{}".format(plan)) return sdk_spin.time_wait_return(fn)
def fn(): plan = get_plan(service_name, plan_name) sdk_utils.out('Waiting for {} plan to have {} status:\n{}'.format( plan_name, status, plan_string(plan_name, plan))) if plan and plan['status'] == status: return plan else: return False
def test_deploy(): plan.wait_for_completed_deployment(PACKAGE_NAME) deployment_plan = plan.get_deployment_plan(PACKAGE_NAME) sdk_utils.out("deployment_plan: " + str(deployment_plan)) assert (len(deployment_plan['phases']) == 1) assert (deployment_plan['phases'][0]['name'] == 'hello') assert (len(deployment_plan['phases'][0]['steps']) == 1)
def expected_nodes_success_predicate(): result = get_elasticsearch_cluster_health() if result is None: return False node_count = result["number_of_nodes"] sdk_utils.out('Waiting for {} healthy nodes, got {}'.format( DEFAULT_NODE_COUNT, node_count)) return node_count == DEFAULT_NODE_COUNT
def expected_nodes(): result = _get_elasticsearch_cluster_health(curl_api) if result is None: return False node_count = result["number_of_nodes"] sdk_utils.out('Waiting for {} healthy nodes, got {}'.format( DEFAULT_TASK_COUNT, node_count)) return node_count == DEFAULT_TASK_COUNT
def test_bump_data_nodes(): data_ids = tasks.get_task_ids(PACKAGE_NAME, 'data') sdk_utils.out('data ids: ' + str(data_ids)) marathon.bump_task_count_config(PACKAGE_NAME, 'DATA_COUNT') check_healthy(DEFAULT_TASK_COUNT + 1) tasks.check_tasks_not_updated(PACKAGE_NAME, 'data', data_ids)
def test_bump_journal_cpus(): journal_ids = tasks.get_task_ids(PACKAGE_NAME, 'journal') sdk_utils.out('journal ids: ' + str(journal_ids)) marathon.bump_cpu_count_config(PACKAGE_NAME, 'JOURNAL_CPUS') tasks.check_tasks_updated(PACKAGE_NAME, 'journal', journal_ids) check_healthy()
def test_sidecar(): plan.start_sidecar_plan(PACKAGE_NAME) sidecar_plan = plan.get_sidecar_plan(PACKAGE_NAME).json() sdk_utils.out("sidecar_plan: " + str(sidecar_plan)) assert(len(sidecar_plan['phases']) == 1) assert(sidecar_plan['phases'][0]['name'] == 'sidecar-deploy') assert(len(sidecar_plan['phases'][0]['steps']) == 2)
def check_properties(xml, expect): found = {} for prop in xml.findall('property'): name = prop.find('name').text if name in expect: found[name] = prop.find('value').text utils.out('expect: {}\nfound: {}'.format(expect, found)) assert expect == found
def run_cli(cmd): (stdout, stderr, ret) = shakedown.run_dcos_command(cmd) if ret != 0: err = 'Got error code {} when running command "dcos {}":\nstdout: "{}"\nstderr: "{}"'.format( ret, cmd, stdout, stderr) sdk_utils.out(err) raise Exception(err) return stdout
def fn(): plan = get_plan(service_name, plan_name) step = get_step(get_phase(plan, phase_name), step_name) sdk_utils.out('Waiting for {}.{}.{} step to have {} status:\n{}'.format( plan_name, phase_name, step_name, status, plan_string(plan_name, plan))) if step and step['status'] == status: return plan else: return False
def fn(): plan = get_plan(service_name, plan_name) phase = get_phase(plan, phase_name) sdk_utils.out('Waiting for {}.{} phase to have {} status:\n{}'.format( plan_name, phase_name, status, plan_string(plan_name, plan))) if phase and phase['status'] == status: return plan else: return False
def upgrade_or_downgrade(package_name, running_task_count, additional_options): task_ids = tasks.get_task_ids(package_name, '') marathon.destroy_app(package_name) install.install(package_name, running_task_count, check_suppression=False, additional_options=additional_options) sdk_utils.out('Waiting for upgrade / downgrade deployment to complete') spin.time_wait_noisy(lambda: ( plan.get_deployment_plan(package_name).json()['status'] == 'COMPLETE')) sdk_utils.out('Checking that all tasks have restarted') tasks.check_tasks_updated(package_name, '', task_ids)
def test_bump_hello_nodes(): check_running() hello_ids = tasks.get_task_ids(PACKAGE_NAME, 'hello') sdk_utils.out('hello ids: ' + str(hello_ids)) marathon.bump_task_count_config(PACKAGE_NAME, 'HELLO_COUNT') check_running() tasks.check_tasks_not_updated(PACKAGE_NAME, 'hello', hello_ids)
def test_deploy(): plan.wait_for_completed_deployment(PACKAGE_NAME) deployment_plan = plan.get_deployment_plan(PACKAGE_NAME).json() sdk_utils.out("deployment_plan: " + str(deployment_plan)) assert(len(deployment_plan['phases']) == 2) assert(deployment_plan['phases'][0]['name'] == 'server-deploy') assert(deployment_plan['phases'][1]['name'] == 'once-deploy') assert(len(deployment_plan['phases'][0]['steps']) == 2) assert(len(deployment_plan['phases'][1]['steps']) == 2)
def fn(): response = dcos.http.request(method, url, **kwargs) if log_args: sdk_utils.out('Got {} for {} {} (args: {})'.format( response.status_code, method.upper(), url, kwargs)) else: sdk_utils.out('Got {} for {} {} ({} args)'.format( response.status_code, method.upper(), url, len(kwargs))) response.raise_for_status() return response
def test_sidecar(): plan.start_plan(PACKAGE_NAME, 'sidecar') started_plan = plan.get_plan(PACKAGE_NAME, 'sidecar') sdk_utils.out("sidecar plan: " + str(started_plan)) assert(len(started_plan['phases']) == 1) assert(started_plan['phases'][0]['name'] == 'sidecar-deploy') assert(len(started_plan['phases'][0]['steps']) == 2) plan.wait_for_completed_plan(PACKAGE_NAME, 'sidecar')
def run_plan(plan_name, params=None): plan.start_plan(PACKAGE_NAME, plan_name, params) started_plan = plan.get_plan(PACKAGE_NAME, plan_name) sdk_utils.out("sidecar plan: " + str(started_plan)) assert (len(started_plan['phases']) == 1) assert (started_plan['phases'][0]['name'] == plan_name + '-deploy') assert (len(started_plan['phases'][0]['steps']) == 2) plan.wait_for_completed_plan(PACKAGE_NAME, plan_name)
def check_tasks_not_updated(service_name, prefix, old_task_ids): sdk_plan.wait_for_completed_deployment(service_name) sdk_plan.wait_for_completed_recovery(service_name) task_ids = get_task_ids(service_name, prefix) task_sets = "\n- Old tasks: {}\n- Current tasks: {}".format( sorted(old_task_ids), sorted(task_ids)) sdk_utils.out( 'Checking tasks starting with "{}" have not been updated:{}'.format( prefix, task_sets)) assert set(old_task_ids).issubset( set(task_ids)), "Tasks got updated:{}".format(task_sets)
def test_sidecar(): plan.start_sidecar_plan(PACKAGE_NAME) sidecar_plan = plan.get_sidecar_plan(PACKAGE_NAME).json() sdk_utils.out("sidecar_plan: " + str(sidecar_plan)) assert (len(sidecar_plan['phases']) == 1) assert (sidecar_plan['phases'][0]['name'] == 'sidecar-deploy') assert (len(sidecar_plan['phases'][0]['steps']) == 2) spin.time_wait_noisy(lambda: (plan.get_sidecar_plan(PACKAGE_NAME).json()[ 'status'] == 'COMPLETE'))
def test_deploy(): deployment_plan = plan.get_deployment_plan(PACKAGE_NAME) sdk_utils.out("deployment plan: " + str(deployment_plan)) assert(len(deployment_plan['phases']) == 3) assert(deployment_plan['phases'][0]['name'] == 'hello-deploy') assert(deployment_plan['phases'][1]['name'] == 'world-server-deploy') assert(deployment_plan['phases'][2]['name'] == 'world-once-deploy') assert(len(deployment_plan['phases'][0]['steps']) == 2) assert(len(deployment_plan['phases'][1]['steps']) == 1) assert(len(deployment_plan['phases'][2]['steps']) == 1)
def fn(): nonlocal consecutive_task_running svc_tasks = shakedown.get_service_tasks(PACKAGE_NAME) states = [t['state'] for t in svc_tasks] sdk_utils.out('Task states: {}'.format(states)) if 'TASK_RUNNING' in states: consecutive_task_running += 1 assert consecutive_task_running <= 3 else: consecutive_task_running = 0 return False
def create_topic(service_name=SERVICE_NAME): create_info = service_cli('topic create {}'.format(EPHEMERAL_TOPIC_NAME), service_name=service_name) utils.out(create_info) assert ('Created topic "%s".\n' % EPHEMERAL_TOPIC_NAME in create_info['message']) assert ("topics with a period ('.') or underscore ('_') could collide." in create_info['message']) topic_list_info = service_cli('topic list', service_name=service_name) assert topic_list_info == [EPHEMERAL_TOPIC_NAME] topic_info = service_cli('topic describe {}'.format(EPHEMERAL_TOPIC_NAME), service_name=service_name) assert len(topic_info) == 1 assert len(topic_info['partitions']) == DEFAULT_PARTITION_COUNT
def marathon_dropped_service(): client = shakedown.marathon.create_client() app_list = client.get_apps() app_ids = [app['id'] for app in app_list] sdk_utils.out('Marathon apps: {}'.format(app_ids)) matching_app_ids = [ app_id for app_id in app_ids if app_id == marathon_app_id ] if len(matching_app_ids) > 1: sdk_utils.out('Found multiple apps with id {}'.format( marathon_app_id)) return len(matching_app_ids) == 0