def test_old_tasks_not_relaunched(): hello_task_id = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello") assert len(hello_task_id) > 0, "Got an empty list of task_ids" # Start update plan with options that have list of yaml files to make it launch in multi service mode sdk_upgrade.update_or_upgrade_or_downgrade( config.PACKAGE_NAME, config.SERVICE_NAME, to_package_version=None, additional_options={ "service": { "yaml": "", "yamls": "svc,foobar_service_name" } }, expected_running_tasks=4, wait_for_deployment=False, ) # Ensure new tasks are launched but the old task does not relaunch sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME, multiservice_name="foobar") sdk_tasks.check_task_not_relaunched( config.SERVICE_NAME, "hello-0-server", hello_task_id.pop(), multiservice_name=config.SERVICE_NAME, ) assert len(sdk_tasks.get_task_ids(config.SERVICE_NAME, "foo")) == 1
def check_tasks_not_updated(service_name, prefix, old_task_ids): sdk_plan.wait_for_completed_deployment(service_name) sdk_plan.wait_for_completed_recovery(service_name) task_ids = get_task_ids(service_name, prefix) task_sets = "\n- Old tasks: {}\n- Current tasks: {}".format(sorted(old_task_ids), sorted(task_ids)) log.info('Checking tasks starting with "{}" have not been updated:{}'.format(prefix, task_sets)) assert set(old_task_ids).issubset(set(task_ids)), "Tasks got updated:{}".format(task_sets)
def test_custom_decommission(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) marathon_config = sdk_marathon.get_config(foldered_name) marathon_config['env']['WORLD_COUNT'] = '1' sdk_marathon.update_app(foldered_name, marathon_config) sdk_plan.wait_for_completed_plan(foldered_name, 'decommission') decommission_plan = sdk_plan.get_decommission_plan(foldered_name) log.info("decommission plan: {}".format(decommission_plan)) custom_step_name = decommission_plan['phases'][0]['steps'][0]['name'] assert "custom_decomission_step" == custom_step_name # scale back up marathon_config = sdk_marathon.get_config(foldered_name) marathon_config['env']['WORLD_COUNT'] = '2' sdk_marathon.update_app(foldered_name, marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) # Let's decommission again! marathon_config = sdk_marathon.get_config(foldered_name) marathon_config['env']['WORLD_COUNT'] = '1' sdk_marathon.update_app(foldered_name, marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_plan(foldered_name, 'decommission') decommission_plan = sdk_plan.get_decommission_plan(foldered_name) log.info("decommission plan: {}".format(decommission_plan)) custom_step_name = decommission_plan['phases'][0]['steps'][0]['name'] assert "custom_decomission_step" == custom_step_name
def test_custom_zookeeper(): broker_ids = sdk_tasks.get_task_ids( FOLDERED_SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE)) # create a topic against the default zk: sdk_cmd.svc_cli( config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, 'topic create {}'.format(config.DEFAULT_TOPIC_NAME), json=True) assert sdk_cmd.svc_cli( config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, 'topic list', json=True) == [config.DEFAULT_TOPIC_NAME] marathon_config = sdk_marathon.get_config(FOLDERED_SERVICE_NAME) # should be using default path when this envvar is empty/unset: assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == '' # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall: zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format(ZK_SERVICE_PATH) marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path sdk_marathon.update_app(FOLDERED_SERVICE_NAME, marathon_config) sdk_tasks.check_tasks_updated( FOLDERED_SERVICE_NAME, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids) sdk_plan.wait_for_completed_deployment(FOLDERED_SERVICE_NAME) # wait for brokers to finish registering test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=FOLDERED_SERVICE_NAME) zookeeper = sdk_cmd.svc_cli( config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, 'endpoints zookeeper') assert zookeeper.rstrip('\n') == zk_path # topic created earlier against default zk should no longer be present: assert sdk_cmd.svc_cli(config.PACKAGE_NAME, FOLDERED_SERVICE_NAME, 'topic list', json=True) == []
def setup_constraint_switch(): sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) agents = shakedown.get_private_agents() some_agent = agents[0] other_agent = agents[1] log.info('Agents: %s %s', some_agent, other_agent) assert some_agent != other_agent options = _escape_placement_for_1_9({ "service": { "yaml": "marathon_constraint" }, "hello": { "count": 1, # First, we stick the pod to some_agent "placement": "[[\"hostname\", \"LIKE\", \"{}\"]]".format(some_agent) }, "world": { "count": 0 } }) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options) sdk_tasks.check_running(config.SERVICE_NAME, 1) hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello') # Now, stick it to other_agent marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config['env']['HELLO_PLACEMENT'] = "[[\"hostname\", \"LIKE\", \"{}\"]]".format(other_agent) sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) # Wait for the scheduler to be up and settled before advancing. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) return some_agent, other_agent, hello_ids
def kafka_service_tls(service_account): try: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) config.install( config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_BROKER_COUNT, additional_options={ "service": { "service_account": service_account, "service_account_secret": service_account, "security": { "transport_encryption": { "enabled": True } } } } ) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) yield service_account finally: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
def hdfs_service_tls(service_account): try: sdk_install.install(PACKAGE_NAME, service_name=SERVICE_NAME, expected_running_tasks=DEFAULT_TASK_COUNT, additional_options={ "service": { "service_account_secret": service_account, "service_account": service_account, "tls": { "enabled": True, } } }) sdk_plan.wait_for_completed_deployment(SERVICE_NAME) # Wait for service health check to pass shakedown.service_healthy(SERVICE_NAME) except Exception as error: try: sdk_install.uninstall(PACKAGE_NAME, SERVICE_NAME) except: pass raise error yield sdk_install.uninstall(PACKAGE_NAME, SERVICE_NAME)
def test_custom_zookeeper(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) broker_ids = sdk_tasks.get_task_ids(foldered_name, "{}-".format(config.DEFAULT_POD_TYPE)) # create a topic against the default zk: test_utils.create_topic(config.DEFAULT_TOPIC_NAME, service_name=foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) # should be using default path when this envvar is empty/unset: assert marathon_config["env"]["KAFKA_ZOOKEEPER_URI"] == "" # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall: zk_path = "master.mesos:2181/{}/CUSTOMPATH".format(sdk_utils.get_zk_path(foldered_name)) marathon_config["env"]["KAFKA_ZOOKEEPER_URI"] = zk_path sdk_marathon.update_app(marathon_config) sdk_tasks.check_tasks_updated(foldered_name, "{}-".format(config.DEFAULT_POD_TYPE), broker_ids) sdk_plan.wait_for_completed_deployment(foldered_name) # wait for brokers to finish registering test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name) _, zookeeper, _ = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, "endpoints zookeeper") assert zookeeper.rstrip("\n") == zk_path # topic created earlier against default zk should no longer be present: _, topic_list_info, _ = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, "topic list", parse_json=True) test_utils.assert_topic_lists_are_equal_without_automatic_topics([], topic_list_info)
def hdfs_service_tls(service_account): try: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) sdk_install.install( config.PACKAGE_NAME, service_name=config.SERVICE_NAME, expected_running_tasks=config.DEFAULT_TASK_COUNT, additional_options={ "service": { "service_account": service_account["name"], "service_account_secret": service_account["secret"], "security": { "transport_encryption": { "enabled": True } } } }, timeout_seconds=30 * 60) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) yield service_account finally: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
def test_secrets_dcos_space(): # 1) create secrets in hello-world/somePath, i.e. hello-world/somePath/secret1 ... # 2) Tasks with DCOS_SPACE hello-world/somePath # or some DCOS_SPACE path under hello-world/somePath # (for example hello-world/somePath/anotherPath/) # can access these Secrets install.uninstall(PACKAGE_NAME) # cannot access these secrets because of DCOS_SPACE authorization create_secrets("{}/somePath/".format(PACKAGE_NAME)) try: install.install(PACKAGE_NAME, NUM_HELLO + NUM_WORLD, additional_options=options_dcos_space_test) plan.wait_for_completed_deployment(PACKAGE_NAME) assert False, "Should have failed to install" except AssertionError as arg: raise arg except: pass # expected to fail # clean up and delete secrets delete_secrets("{}/somePath/".format(PACKAGE_NAME))
def install(package_name, service_name, expected_running_tasks, additional_options={}, package_version=None, timeout_seconds=TIMEOUT_SECONDS, wait_for_deployment=True): start = time.time() merged_options = get_package_options(additional_options) log.info('Installing {}/{} with options={} version={}'.format( package_name, service_name, merged_options, package_version)) # 1. Install package, wait for tasks, wait for marathon deployment retried_shakedown_install(package_name, service_name, package_version, merged_options, timeout_seconds, expected_running_tasks) # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit) # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete state. if wait_for_deployment: # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info("Waiting for {}/{} to finish deployment plan...".format( package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds) log.info('Installed {}/{} after {}'.format( package_name, service_name, shakedown.pretty_duration(time.time() - start)))
def test_task_dns_prefix_points_to_all_tasks(): pod_info = sdk_cmd.service_request('GET', config.SERVICE_NAME, '/v1/pod/hello-0/info').json() # Assert that DiscoveryInfo is correctly set on tasks. assert (all(p["info"]["discovery"]["name"] == "hello-0" for p in pod_info)) # Assert that the hello-0.hello-world.mesos DNS entry points to the right IP. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
def test_disable_quota_role(): # Add new pods to service which should be launched with the new role. marathon_config = sdk_marathon.get_config(SERVICE_NAME) # Turn off legacy role. marathon_config["env"]["ENABLE_ROLE_MIGRATION"] = "false" # Update the app sdk_marathon.update_app(marathon_config) # Wait for scheduler to restart. sdk_plan.wait_for_completed_deployment(SERVICE_NAME) # Get the current service state to verify roles have applied. service_roles = sdk_utils.get_service_roles(SERVICE_NAME) current_task_roles = service_roles["task-roles"] # We must have some role! assert len(current_task_roles) > 0 assert len(current_task_roles) == 3 assert LEGACY_ROLE in current_task_roles.values() assert ENFORCED_ROLE not in current_task_roles.values() # Ensure we're not MULTI_ROLE, and only using the legacy-role. assert service_roles["framework-roles"] is None assert service_roles["framework-role"] == LEGACY_ROLE
def test_custom_decommission(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) marathon_config = sdk_marathon.get_config(foldered_name) marathon_config["env"]["WORLD_COUNT"] = "1" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_plan(foldered_name, "decommission") decommission_plan = sdk_plan.get_decommission_plan(foldered_name) log.info(sdk_plan.plan_string("decommission", decommission_plan)) custom_step_name = decommission_plan["phases"][0]["steps"][0]["name"] assert "custom_decommission_step" == custom_step_name # scale back up marathon_config = sdk_marathon.get_config(foldered_name) marathon_config["env"]["WORLD_COUNT"] = "2" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) # Let's decommission again! marathon_config = sdk_marathon.get_config(foldered_name) marathon_config["env"]["WORLD_COUNT"] = "1" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_plan(foldered_name, "decommission") decommission_plan = sdk_plan.get_decommission_plan(foldered_name) log.info(sdk_plan.plan_string("decommission", decommission_plan)) custom_step_name = decommission_plan["phases"][0]["steps"][0]["name"] assert "custom_decommission_step" == custom_step_name
def test_metrics() -> None: expected_metrics = [ "node.data-0-node.fs.total.total_in_bytes", "node.data-0-node.jvm.mem.pools.old.peak_used_in_bytes", "node.data-0-node.jvm.threads.count", ] def expected_metrics_exist(emitted_metrics: List[str]) -> bool: # Elastic metrics are also dynamic and based on the service name# For eg: # elasticsearch.test__integration__elastic.node.data-0-node.thread_pool.listener.completed # To prevent this from breaking we drop the service name from the metric name # => data-0-node.thread_pool.listener.completed metric_names = [".".join(metric_name.split(".")[2:]) for metric_name in emitted_metrics] return sdk_metrics.check_metrics_presence(metric_names, expected_metrics) sdk_metrics.wait_for_service_metrics( package_name, service_name, "data-0", "data-0-node", config.DEFAULT_TIMEOUT, expected_metrics_exist, ) sdk_plan.wait_for_completed_deployment(service_name) sdk_plan.wait_for_completed_recovery(service_name)
def elastic_service_tls(service_account): sdk_install.install( PACKAGE_NAME, service_name=SERVICE_NAME, expected_running_tasks=NO_INGEST_TASK_COUNT, additional_options={ "service": { "service_account_secret": service_account, "service_account": service_account, "tls": True, }, "elasticsearch": { "xpack_enabled": True, } } ) sdk_plan.wait_for_completed_deployment(SERVICE_NAME) # Wait for service health check to pass shakedown.service_healthy(SERVICE_NAME) yield sdk_install.uninstall(PACKAGE_NAME, SERVICE_NAME)
def test_metrics(): expected_metrics = [ "node.data-0-node.fs.total.total_in_bytes", "node.data-0-node.jvm.mem.pools.old.peak_used_in_bytes", "node.data-0-node.jvm.threads.count", ] def expected_metrics_exist(emitted_metrics): # Elastic metrics are also dynamic and based on the service name# For eg: # elasticsearch.test__integration__elastic.node.data-0-node.thread_pool.listener.completed # To prevent this from breaking we drop the service name from the metric name # => data-0-node.thread_pool.listener.completed metric_names = [ ".".join(metric_name.split(".")[2:]) for metric_name in emitted_metrics ] return sdk_metrics.check_metrics_presence(metric_names, expected_metrics) sdk_metrics.wait_for_service_metrics( config.PACKAGE_NAME, foldered_name, "data-0", "data-0-node", config.DEFAULT_TIMEOUT, expected_metrics_exist, ) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def test_old_tasks_get_relaunched_with_new_config(): hello_task_id = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello") assert len(hello_task_id) > 0, "Got an empty list of task_ids" # Start update plan with options that have list of yaml files to make it # launch in multi service mode with updated config sdk_upgrade.update_or_upgrade_or_downgrade( config.PACKAGE_NAME, config.SERVICE_NAME, to_version=None, to_options={ "service": { "yaml": "", "yamls": "svc,foobar_service_name" }, "hello": { "cpus": 0.2 }, }, expected_running_tasks=4, wait_for_deployment=False, ) # Ensure the old task DOES relaunch sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME, multiservice_name="foobar") sdk_tasks.check_task_relaunched("hello-0-server", hello_task_id.pop()) assert len(sdk_tasks.get_task_ids(config.SERVICE_NAME, "foo")) == 1
def setup_constraint_switch(): sdk_install.uninstall(PACKAGE_NAME) agents = shakedown.get_private_agents() some_agent = agents[0] other_agent = agents[1] log.info('Agents: %s %s', some_agent, other_agent) assert some_agent != other_agent options = { "service": { "spec_file": "examples/marathon_constraint.yml" }, "hello": { "count": 1, # First, we stick the pod to some_agent "placement": 'hostname:LIKE:{}'.format(some_agent) }, "world": { "count": 0 } } sdk_install.install(PACKAGE_NAME, 1, additional_options=options) sdk_tasks.check_running(PACKAGE_NAME, 1) hello_ids = sdk_tasks.get_task_ids(PACKAGE_NAME, 'hello') # Now, stick it to other_agent config = sdk_marathon.get_config(PACKAGE_NAME) config['env']['HELLO_PLACEMENT'] = 'hostname:LIKE:{}'.format(other_agent) sdk_marathon.update_app(PACKAGE_NAME, config) # Wait for the scheduler to be up and settled before advancing. sdk_plan.wait_for_completed_deployment(PACKAGE_NAME) return some_agent, other_agent, hello_ids
def test_node_replace_replaces_node(): replace_task = [ task for task in sdk_tasks.get_summary() if task.name == "node-2-server" ][0] log.info("avoid host for task {}".format(replace_task)) replace_pod_name = replace_task.name[:-len("-server")] # Update the placement constraints so the new node doesn't end up on the same host marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) original_constraint = marathon_config["env"]["PLACEMENT_CONSTRAINT"] try: marathon_config["env"][ "PLACEMENT_CONSTRAINT"] = '[["hostname", "UNLIKE", "{}"]]'.format( replace_task.host) sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # start replace and wait for it to finish sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, "pod replace {}".format(replace_pod_name)) sdk_plan.wait_for_kicked_off_recovery(config.SERVICE_NAME) sdk_plan.wait_for_completed_recovery( config.SERVICE_NAME, timeout_seconds=RECOVERY_TIMEOUT_SECONDS) finally: # revert to prior placement setting before proceeding with tests: avoid getting stuck. marathon_config["env"]["PLACEMENT_CONSTRAINT"] = original_constraint sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
def cassandra_service_tls(service_account): sdk_install.uninstall(package_name=config.PACKAGE_NAME, service_name=config.SERVICE_NAME) sdk_install.install( config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options={ "service": { "service_account": service_account["name"], "service_account_secret": service_account["secret"], "security": { "transport_encryption": { "enabled": True } } } } ) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # Wait for service health check to pass shakedown.service_healthy(config.SERVICE_NAME) yield sdk_install.uninstall(package_name=config.PACKAGE_NAME, service_name=config.SERVICE_NAME)
def check_healthy(service_name, count=DEFAULT_TASK_COUNT, recovery_expected=False): sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds=25 * 60) if recovery_expected: # TODO(elezar): See INFINITY-2109 where we need to better handle recovery health checks sdk_plan.wait_for_kicked_off_recovery(service_name, timeout_seconds=25 * 60) sdk_plan.wait_for_completed_recovery(service_name, timeout_seconds=25 * 60) sdk_tasks.check_running(service_name, count)
def cassandra_service_tls(service_account): sdk_install.uninstall(package_name=config.PACKAGE_NAME, service_name=config.SERVICE_NAME) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_TASK_COUNT, additional_options={ "service": { "service_account": service_account["name"], "service_account_secret": service_account["secret"], "security": { "transport_encryption": { "enabled": True } } } }) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # Wait for service health check to pass shakedown.service_healthy(config.SERVICE_NAME) yield sdk_install.uninstall(package_name=config.PACKAGE_NAME, service_name=config.SERVICE_NAME)
def test_custom_zookeeper(): foldered_name = sdk_utils.get_foldered_name(config.SERVICE_NAME) broker_ids = sdk_tasks.get_task_ids(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE)) # create a topic against the default zk: test_utils.create_topic(config.DEFAULT_TOPIC_NAME, service_name=foldered_name) marathon_config = sdk_marathon.get_config(foldered_name) # should be using default path when this envvar is empty/unset: assert marathon_config['env']['KAFKA_ZOOKEEPER_URI'] == '' # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall: zk_path = 'master.mesos:2181/{}/CUSTOMPATH'.format(sdk_utils.get_zk_path(foldered_name)) marathon_config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path sdk_marathon.update_app(foldered_name, marathon_config) sdk_tasks.check_tasks_updated(foldered_name, '{}-'.format(config.DEFAULT_POD_TYPE), broker_ids) sdk_plan.wait_for_completed_deployment(foldered_name) # wait for brokers to finish registering test_utils.broker_count_check(config.DEFAULT_BROKER_COUNT, service_name=foldered_name) zookeeper = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'endpoints zookeeper') assert zookeeper.rstrip('\n') == zk_path # topic created earlier against default zk should no longer be present: topic_list_info = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'topic list', json=True) test_utils.assert_topic_lists_are_equal_without_automatic_topics([], topic_list_info)
def setup_constraint_switch(): sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) agents = shakedown.get_private_agents() some_agent = agents[0] other_agent = agents[1] log.info('Agents: %s %s', some_agent, other_agent) assert some_agent != other_agent options = _escape_placement_for_1_9({ "service": { "yaml": "marathon_constraint" }, "hello": { "count": 1, # First, we stick the pod to some_agent "placement": "[[\"hostname\", \"LIKE\", \"{}\"]]".format(some_agent) }, "world": { "count": 0 } }) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options) sdk_tasks.check_running(config.SERVICE_NAME, 1) hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, 'hello') # Now, stick it to other_agent marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config['env']['HELLO_PLACEMENT'] = "[[\"hostname\", \"LIKE\", \"{}\"]]".format(other_agent) sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) # Wait for the scheduler to be up and settled before advancing. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) return some_agent, other_agent, hello_ids
def check_tasks_not_updated(service_name, prefix, old_task_ids): sdk_plan.wait_for_completed_deployment(service_name) sdk_plan.wait_for_completed_recovery(service_name) task_ids = get_task_ids(service_name, prefix) task_sets = "\n- Old tasks: {}\n- Current tasks: {}".format(sorted(old_task_ids), sorted(task_ids)) log.info('Checking tasks starting with "{}" have not been updated:{}'.format(prefix, task_sets)) assert set(old_task_ids).issubset(set(task_ids)), 'Tasks starting with "{}" were updated:{}'.format(prefix, task_sets)
def check_healthy(service_name, count=DEFAULT_TASK_COUNT, recovery_expected=False): sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds=25 * 60) if recovery_expected: # TODO(elezar): See INFINITY-2109 where we need to better handle recovery health checks sdk_plan.wait_for_kicked_off_recovery(service_name, timeout_seconds=25 * 60) sdk_plan.wait_for_completed_recovery(service_name, timeout_seconds=25 * 60) sdk_tasks.check_running(service_name, count)
def test_hostname_unique(): install.uninstall(PACKAGE_NAME) options = { "service": { "spec_file": "examples/marathon_constraint.yml" }, "hello": { "count": num_private_agents, "placement": "hostname:UNIQUE" }, "world": { "count": num_private_agents, "placement": "hostname:UNIQUE" } } install.install(PACKAGE_NAME, num_private_agents * 2, additional_options=options) # hello deploys first. One "world" task should end up placed with each "hello" task. plan.wait_for_completed_deployment(PACKAGE_NAME) # ensure "hello" task can still be placed with "world" task cmd.run_cli('hello-world pods replace hello-0') tasks.check_running(PACKAGE_NAME, num_private_agents * 2 - 1, timeout_seconds=10) tasks.check_running(PACKAGE_NAME, num_private_agents * 2) ensure_multiple_per_agent(hello=1, world=1)
def hdfs_service_tls(service_account): try: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) sdk_install.install(config.PACKAGE_NAME, service_name=config.SERVICE_NAME, expected_running_tasks=config.DEFAULT_TASK_COUNT, additional_options={ "service": { "service_account": service_account["name"], "service_account_secret": service_account["secret"], "security": { "transport_encryption": { "enabled": True } } } }, timeout_seconds=30 * 60) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) yield service_account finally: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
def kafka_service_tls(service_account): try: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) config.install( config.PACKAGE_NAME, config.SERVICE_NAME, config.DEFAULT_BROKER_COUNT, additional_options={ "service": { "service_account": service_account["name"], "service_account_secret": service_account["secret"], "security": { "transport_encryption": { "enabled": True } } } } ) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) yield service_account finally: sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME)
def test_more_pods_disable_legacy_role_post_update(): # Ensure we can scale out more still with legacy role disabled. # Add new pods to service which should be launched with the new role. marathon_config = sdk_marathon.get_config(SERVICE_NAME) # Add an extra pod to each. marathon_config["env"]["HELLO_COUNT"] = "3" marathon_config["env"]["WORLD_COUNT"] = "4" # Update the app sdk_marathon.update_app(marathon_config) # Wait for scheduler to restart. sdk_plan.wait_for_completed_deployment(SERVICE_NAME) # Get the current service state to verify roles have applied. service_roles = sdk_utils.get_service_roles(SERVICE_NAME) current_task_roles = service_roles["task-roles"] # We must have some role! assert len(current_task_roles) > 0 assert len(current_task_roles) == 7 assert LEGACY_ROLE not in current_task_roles.values() assert ENFORCED_ROLE in current_task_roles.values() # Ensure we're MULTI_ROLE assert service_roles["framework-roles"] is None assert service_roles["framework-role"] == ENFORCED_ROLE
def test_node_replace_replaces_node(): replace_task = [ task for task in sdk_tasks.get_summary() if task.name == 'node-2-server'][0] log.info('avoid host for task {}'.format(replace_task)) replace_pod_name = replace_task.name[:-len('-server')] # Update the placement constraints so the new node doesn't end up on the same host marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) original_constraint = marathon_config['env']['PLACEMENT_CONSTRAINT'] try: marathon_config['env']['PLACEMENT_CONSTRAINT'] = '[["hostname", "UNLIKE", "{}"]]'.format(replace_task.host) sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) # start replace and wait for it to finish sdk_cmd.svc_cli(config.PACKAGE_NAME, config.SERVICE_NAME, 'pod replace {}'.format(replace_pod_name)) sdk_plan.wait_for_kicked_off_recovery(config.SERVICE_NAME) sdk_plan.wait_for_completed_recovery(config.SERVICE_NAME, timeout_seconds=RECOVERY_TIMEOUT_SECONDS) finally: # revert to prior placement setting before proceeding with tests: avoid getting stuck. marathon_config['env']['PLACEMENT_CONSTRAINT'] = original_constraint sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
def setup_constraint_switch(): sdk_install.uninstall(config.PACKAGE_NAME, config.SERVICE_NAME) agents = sdk_agents.get_private_agents() some_agent = agents[0]["hostname"] other_agent = agents[1]["hostname"] log.info("Agents: %s %s", some_agent, other_agent) assert some_agent != other_agent options = _escape_placement_for_1_9( { "service": {"yaml": "marathon_constraint"}, "hello": { "count": 1, # First, we stick the pod to some_agent "placement": '[["hostname", "LIKE", "{}"]]'.format(some_agent), }, "world": {"count": 0}, } ) sdk_install.install(config.PACKAGE_NAME, config.SERVICE_NAME, 1, additional_options=options) sdk_tasks.check_running(config.SERVICE_NAME, 1) hello_ids = sdk_tasks.get_task_ids(config.SERVICE_NAME, "hello") # Now, stick it to other_agent marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config["env"]["HELLO_PLACEMENT"] = '[["hostname", "LIKE", "{}"]]'.format(other_agent) sdk_marathon.update_app(marathon_config) # Wait for the scheduler to be up and settled before advancing. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) return some_agent, other_agent, hello_ids
def test_custom_zookeeper(): broker_ids = tasks.get_task_ids(FOLDERED_SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE)) # sanity check: brokers should be reinitialized: brokers = service_cli('broker list', service_name=FOLDERED_SERVICE_NAME) assert set(brokers) == set([str(i) for i in range(DEFAULT_BROKER_COUNT)]) # create a topic against the default zk: service_cli('topic create {}'.format(DEFAULT_TOPIC_NAME), service_name=FOLDERED_SERVICE_NAME) assert service_cli('topic list', service_name=FOLDERED_SERVICE_NAME) == [ DEFAULT_TOPIC_NAME ] config = marathon.get_config(FOLDERED_SERVICE_NAME) # should be using default path when this envvar is empty/unset: assert config['env']['KAFKA_ZOOKEEPER_URI'] == '' # use a custom zk path that's WITHIN the 'dcos-service-' path, so that it's automatically cleaned up in uninstall: zk_path = 'master.mesos:2181/dcos-service-test__integration__kafka/CUSTOMPATH' config['env']['KAFKA_ZOOKEEPER_URI'] = zk_path marathon.update_app(FOLDERED_SERVICE_NAME, config) tasks.check_tasks_updated(FOLDERED_SERVICE_NAME, '{}-'.format(DEFAULT_POD_TYPE), broker_ids) plan.wait_for_completed_deployment(FOLDERED_SERVICE_NAME) zookeeper = service_cli('endpoints zookeeper', get_json=False, service_name=FOLDERED_SERVICE_NAME) assert zookeeper.rstrip('\n') == zk_path # topic created earlier against default zk should no longer be present: assert service_cli('topic list', service_name=FOLDERED_SERVICE_NAME) == []
def test_changing_discovery_replaces_certificate_sans(): """ Update service configuration to change discovery prefix of a task. Scheduler should update task and new SANs should be generated. """ original_tasks = sdk_tasks.get_task_ids(config.PACKAGE_NAME, 'discovery') assert len(original_tasks) == 1, 'Expecting exactly one task ID' task_id = original_tasks[0] assert task_id # Load end-entity certificate from PEM encoded file _, stdout, _ = sdk_cmd.task_exec(task_id, 'cat server.crt') log.info('first server.crt: {}'.format(stdout)) ascii_cert = stdout.encode('ascii') log.info('first server.crt ascii encoded: {}'.format(ascii_cert)) end_entity_cert = x509.load_pem_x509_certificate(ascii_cert, DEFAULT_BACKEND) san_extension = end_entity_cert.extensions.get_extension_for_oid( ExtensionOID.SUBJECT_ALTERNATIVE_NAME) sans = [ san.value for san in san_extension.value._general_names._general_names ] expected_san = ( '{name}-0.{service_name}.autoip.dcos.thisdcos.directory'.format( name=DISCOVERY_TASK_PREFIX, service_name=config.SERVICE_NAME)) assert expected_san in sans # Run task update with new discovery prefix marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) marathon_config['env'][ 'DISCOVERY_TASK_PREFIX'] = DISCOVERY_TASK_PREFIX + '-new' sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) task_id = sdk_tasks.get_task_ids(config.SERVICE_NAME, "discovery")[0] _, stdout, _ = sdk_cmd.task_exec(task_id, 'cat server.crt') log.info('second server.crt: {}'.format(stdout)) ascii_cert = stdout.encode('ascii') log.info('second server.crt ascii encoded: {}'.format(ascii_cert)) new_cert = x509.load_pem_x509_certificate(ascii_cert, DEFAULT_BACKEND) san_extension = new_cert.extensions.get_extension_for_oid( ExtensionOID.SUBJECT_ALTERNATIVE_NAME) sans = [ san.value for san in san_extension.value._general_names._general_names ] expected_san = ( '{name}-0.{service_name}.autoip.dcos.thisdcos.directory'.format( name=DISCOVERY_TASK_PREFIX + '-new', service_name=config.SERVICE_NAME)) assert expected_san in sans
def _upgrade_or_downgrade(package_name, to_package_version, service_name, running_task_count, additional_options, timeout_seconds, wait_for_deployment): initial_config = get_config(package_name, service_name) task_ids = sdk_tasks.get_task_ids(service_name, '') if sdk_utils.dcos_version_less_than( "1.10") or shakedown.ee_version() is None: log.info('Using marathon upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) sdk_marathon.destroy_app(service_name) sdk_install.install(package_name, service_name, running_task_count, additional_options=additional_options, package_version=to_package_version, timeout_seconds=timeout_seconds, wait_for_deployment=wait_for_deployment) else: log.info('Using CLI upgrade flow to upgrade {} {}'.format( package_name, to_package_version)) if additional_options: with tempfile.NamedTemporaryFile() as opts_f: opts_f.write(json.dumps(additional_options).encode('utf-8')) opts_f.flush( ) # ensure json content is available for the CLI to read below sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={} --options={}'.format( to_package_version, opts_f.name)) else: sdk_cmd.svc_cli( package_name, service_name, 'update start --package-version={}'.format(to_package_version)) # we must manually upgrade the package CLI because it's not done automatically in this flow # (and why should it? that'd imply the package CLI replacing itself via a call to the main CLI...) sdk_cmd.run_cli( 'package install --yes --cli --package-version={} {}'.format( to_package_version, package_name)) if wait_for_deployment: updated_config = get_config(package_name, service_name) if updated_config == initial_config: log.info( 'No config change detected. Tasks should not be restarted') sdk_tasks.check_tasks_not_updated(service_name, '', task_ids) else: log.info('Checking that all tasks have restarted') sdk_tasks.check_tasks_updated(service_name, '', task_ids) # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via ONCE tasks, without actually completing deployment log.info( "Waiting for package={} service={} to finish deployment plan...". format(package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds)
def install( package_name: str, service_name: str, expected_running_tasks: int, additional_options: dict = {}, package_version: PackageVersion = PackageVersion.STUB_UNIVERSE, timeout_seconds: int = TIMEOUT_SECONDS, wait_for_deployment: bool = True, insert_strict_options: bool = True, wait_for_all_conditions: bool = True, ) -> None: start = time.time() # If the package is already installed at this point, fail immediately. if sdk_marathon.app_exists(service_name): raise Exception( "Service is already installed: {}".format(service_name)) if insert_strict_options and sdk_utils.is_strict_mode(): # strict mode requires correct principal and secret to perform install. # see also: sdk_security.py options = sdk_utils.merge_dictionaries( { "service": { "service_account": "service-acct", "principal": "service-acct", "service_account_secret": "secret", "secret_name": "secret", } }, additional_options, ) else: options = additional_options # 1. Install package, wait for tasks, wait for marathon deployment _retried_install_impl( package_name, service_name, expected_running_tasks, package_version.value if isinstance( package_version, PackageVersion) else package_version, options, timeout_seconds, wait_for_all_conditions) # 2. Wait for the scheduler to be idle (as implied by deploy plan completion and suppressed bit) # This should be skipped ONLY when it's known that the scheduler will be stuck in an incomplete # state, or if the thing being installed doesn't have a deployment plan (e.g. standalone app) if wait_for_deployment: # this can take a while, default is 15 minutes. for example with HDFS, we can hit the expected # total task count via FINISHED tasks, without actually completing deployment log.info( "Waiting for package={} service={} to finish deployment plan...". format(package_name, service_name)) sdk_plan.wait_for_completed_deployment(service_name, timeout_seconds) log.info("Installed package={} service={} after {}".format( package_name, service_name, sdk_utils.pretty_duration(time.time() - start))) global _installed_service_names _installed_service_names.add(service_name)
def test_deploy(): sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) deployment_plan = sdk_plan.get_deployment_plan(config.SERVICE_NAME) log.info(sdk_plan.plan_string("deploy", deployment_plan)) assert len(deployment_plan["phases"]) == 1 assert deployment_plan["phases"][0]["name"] == "hello" assert len(deployment_plan["phases"][0]["steps"]) == 1
def test_custom_seccomp_profile(): sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) # uname will now be dissalowed and svc should crashloop marathon_config["env"]["HELLO_SECCOMP_PROFILE_NAME"] = "test_profile.json" sdk_marathon.update_app(marathon_config) sdk_marathon.wait_for_deployment(config.SERVICE_NAME, 60, None)
def test_enable(): sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) sdk_plan.recovery_plan_is_empty(config.SERVICE_NAME) sdk_tasks.check_running(config.SERVICE_NAME, 3) set_test_boolean('true') sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) sdk_tasks.check_running(config.SERVICE_NAME, 6) sdk_plan.recovery_plan_is_empty(config.SERVICE_NAME)
def test_indexing(default_populated_index: None) -> None: indices_stats = config.get_elasticsearch_indices_stats(index_name, service_name=service_name) assert indices_stats["_all"]["primaries"]["docs"]["count"] == 1 doc = config.get_document(index_name, index_type, 1, service_name=service_name) assert doc["_source"]["name"] == "Loren" sdk_plan.wait_for_completed_deployment(service_name) sdk_plan.wait_for_completed_recovery(service_name)
def test_enable(): sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) sdk_plan.recovery_plan_is_empty(config.SERVICE_NAME) sdk_tasks.check_running(config.SERVICE_NAME, 3, timeout_seconds=30, allow_more=False) set_test_boolean("true") sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) sdk_tasks.check_running(config.SERVICE_NAME, 6, timeout_seconds=30, allow_more=False) sdk_plan.recovery_plan_is_empty(config.SERVICE_NAME)
def test_deploy(): sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) deployment_plan = sdk_plan.get_deployment_plan(config.SERVICE_NAME) log.info("deployment_plan: " + str(deployment_plan)) assert(len(deployment_plan['phases']) == 1) assert(deployment_plan['phases'][0]['name'] == 'hello') assert(len(deployment_plan['phases'][0]['steps']) == 1)
def test_losing_and_regaining_index_health(default_populated_index): config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "green", service_name=foldered_name) shakedown.kill_process_on_host(sdk_hosts.system_host(foldered_name, "data-0-node"), "data__.*Elasticsearch") config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "yellow", service_name=foldered_name) config.check_elasticsearch_index_health(config.DEFAULT_INDEX_NAME, "green", service_name=foldered_name) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def test_mesos_v0_api(): prior_api_version = sdk_marathon.get_mesos_api_version(foldered_name) if prior_api_version is not "V0": sdk_marathon.set_mesos_api_version(foldered_name, "V0") sdk_marathon.set_mesos_api_version(foldered_name, prior_api_version) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def test_indexing(default_populated_index): indices_stats = config.get_elasticsearch_indices_stats(config.DEFAULT_INDEX_NAME, service_name=foldered_name) assert indices_stats["_all"]["primaries"]["docs"]["count"] == 1 doc = config.get_document(config.DEFAULT_INDEX_NAME, config.DEFAULT_INDEX_TYPE, 1, service_name=foldered_name) assert doc["_source"]["name"] == "Loren" sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def test_deploy(): sdk_plan.wait_for_completed_deployment(config.PACKAGE_NAME) deployment_plan = sdk_plan.get_deployment_plan(config.PACKAGE_NAME) log.info("deployment_plan: " + str(deployment_plan)) assert (len(deployment_plan['phases']) == 1) assert (deployment_plan['phases'][0]['name'] == 'hello') assert (len(deployment_plan['phases'][0]['steps']) == 1)
def check_permanent_recovery( package_name: str, service_name: str, pod_name: str, recovery_timeout_s: int, pods_with_updated_tasks: Optional[List[str]] = None, ) -> None: """ Perform a replace (permanent recovery) operation on the specified pod. The specified pod AND any additional pods in `pods_with_updated_tasks` are checked to ensure that their tasks have been restarted. Any remaining pods are checked to ensure that their tasks are not changed. For example, performing a pod replace kafka-0 on a Kafka framework should result in ONLY the kafa-0-broker task being restarted. In this case, pods_with_updated_tasks is specified as None. When performing a pod replace operation on a Cassandra seed node (node-0), a rolling restart of other nodes is triggered, and pods_with_updated_tasks = ["node-0", "node-1", "node-2"] (assuming a three node Cassandra ring) """ LOG.info("Testing pod replace operation for %s:%s", service_name, pod_name) sdk_plan.wait_for_completed_deployment(service_name) sdk_plan.wait_for_completed_recovery(service_name) rc, stdout, _ = sdk_cmd.svc_cli(package_name, service_name, "pod list") assert rc == 0, "Pod list failed" pod_list = set(json.loads(stdout)) pods_with_updated_tasks = pods_with_updated_tasks if pods_with_updated_tasks else [] pods_to_update = set(pods_with_updated_tasks + [pod_name]) tasks_to_replace = {} for pod in pods_to_update: tasks_to_replace[pod] = set(sdk_tasks.get_task_ids(service_name, pod_name)) LOG.info("The following tasks will be replaced: %s", tasks_to_replace) tasks_in_other_pods = {} for pod in pod_list - pods_to_update: tasks_in_other_pods[pod] = set(sdk_tasks.get_task_ids(service_name, pod)) LOG.info("Tasks in other pods should not be replaced: %s", tasks_in_other_pods) sdk_cmd.svc_cli(package_name, service_name, "pod replace {}".format(pod_name)) sdk_plan.wait_for_kicked_off_recovery(service_name, recovery_timeout_s) sdk_plan.wait_for_completed_recovery(service_name, recovery_timeout_s) for pod, tasks in tasks_to_replace.items(): sdk_tasks.check_tasks_updated(service_name, pod, tasks) for pod, tasks in tasks_in_other_pods.items(): sdk_tasks.check_tasks_not_updated(service_name, pod, tasks)
def test_task_dns_prefix_points_to_all_tasks(): pod_info = dcos.http.get( shakedown.dcos_service_url(config.SERVICE_NAME) + "/v1/pod/{}/info".format("hello-0")).json() # Assert that DiscoveryInfo is correctly set on tasks. assert(all(p["info"]["discovery"]["name"] == "hello-0" for p in pod_info)) # Assert that the hello-0.hello-world.mesos DNS entry points to the right IP. sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME)
def test_plugin_install_and_uninstall(default_populated_index): plugin_name = 'analysis-phonetic' config.update_app(foldered_name, {'TASKCFG_ALL_ELASTICSEARCH_PLUGINS': plugin_name}, current_expected_task_count) config.check_elasticsearch_plugin_installed(plugin_name, service_name=foldered_name) config.update_app(foldered_name, {'TASKCFG_ALL_ELASTICSEARCH_PLUGINS': ''}, current_expected_task_count) config.check_elasticsearch_plugin_uninstalled(plugin_name, service_name=foldered_name) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def test_uninstall(): config.check_running() # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds: marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) env = marathon_config["env"] env["SDK_UNINSTALL"] = "w00t" sdk_marathon.update_app(marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) sdk_tasks.check_running(config.SERVICE_NAME, 0, allow_more=False)
def test_endpoints(): # check that we can reach the scheduler via admin router, and that returned endpoints are sanitized: for endpoint in config.ENDPOINT_TYPES: endpoints = sdk_cmd.svc_cli(config.PACKAGE_NAME, foldered_name, 'endpoints {}'.format(endpoint), json=True) host = endpoint.split('-')[0] # 'coordinator-http' => 'coordinator' assert endpoints['dns'][0].startswith(sdk_hosts.autoip_host(foldered_name, host + '-0-node')) assert endpoints['vip'].startswith(sdk_hosts.vip_host(foldered_name, host)) sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def test_uninstall(): config.check_running() # add the needed envvar in marathon and confirm that the uninstall "deployment" succeeds: marathon_config = sdk_marathon.get_config(config.SERVICE_NAME) env = marathon_config['env'] env['SDK_UNINSTALL'] = 'w00t' sdk_marathon.update_app(config.SERVICE_NAME, marathon_config) sdk_plan.wait_for_completed_deployment(config.SERVICE_NAME) sdk_tasks.check_running(config.SERVICE_NAME, 0)
def test_master_reelection(): initial_master = config.get_elasticsearch_master(service_name=foldered_name) shakedown.kill_process_on_host(sdk_hosts.system_host(foldered_name, initial_master), "master__.*Elasticsearch") sdk_plan.wait_for_in_progress_recovery(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name) config.wait_for_expected_nodes_to_exist(service_name=foldered_name) new_master = config.get_elasticsearch_master(service_name=foldered_name) assert new_master.startswith("master") and new_master != initial_master sdk_plan.wait_for_completed_deployment(foldered_name) sdk_plan.wait_for_completed_recovery(foldered_name)
def check_task_not_relaunched(service_name, task_name, old_task_id, timeout_seconds=DEFAULT_TIMEOUT_SECONDS): sdk_plan.wait_for_completed_deployment(service_name) sdk_plan.wait_for_completed_recovery(service_name) try: task_ids = set([t['id'] for t in shakedown.get_tasks() if t['name'] == task_name]) except dcos.errors.DCOSHTTPException: log.info('Failed to get task ids for service {}'.format(service_name)) task_ids = set([]) assert len(task_ids) == 1 and old_task_id in task_ids
def test_endpoints() -> None: # Check that we can reach the scheduler via admin router, and that returned endpoints are # sanitized. for endpoint in config.ENDPOINT_TYPES: endpoints = sdk_networks.get_endpoint(package_name, service_name, endpoint) host = endpoint.split("-")[0] # 'coordinator-http' => 'coordinator' assert endpoints["dns"][0].startswith(sdk_hosts.autoip_host(service_name, host + "-0-node")) assert endpoints["vip"].startswith(sdk_hosts.vip_host(service_name, host)) sdk_plan.wait_for_completed_deployment(service_name) sdk_plan.wait_for_completed_recovery(service_name)