def create_quota(role_name: str, quota: typing.Dict): """ Create quota for the specified role. """ existing_quotas = sdk_cmd.get_json_output("spark quota list --json", print_output=False) # remove existing quotas matching name if role_name in [x['role'] for x in existing_quotas.get('infos', [])]: rc, _, _ = sdk_cmd.run_raw_cli( "spark quota remove {}".format(role_name)) assert rc == 0, "Error removing quota" cmd_list = ["spark", "quota", "create"] for r in [ "cpus", "mem", "gpus", ]: if r in quota: cmd_list.extend([ "-{}".format(r[0]), quota[r], ]) cmd_list.append(role_name) # create quota log.info("Creating quota for %s: %s", role_name, quota) cmd = " ".join(str(c) for c in cmd_list) rc, _, _ = sdk_cmd.run_raw_cli(cmd) assert rc == 0, "Error creating quota"
def install_app_from_file(app_name: str, app_def_path: str) -> (bool, str): """ Installs a marathon app using the path to an app definition. Args: app_def_path: Path to app definition Returns: (bool, str) tuple: Boolean indicates success of install attempt. String indicates error message if install attempt failed. """ cmd = "marathon app add {}".format(app_def_path) log.info("Running %s", cmd) rc, stdout, stderr = sdk_cmd.run_raw_cli(cmd) if rc or stderr: log.error("returncode=%s stdout=%s stderr=%s", rc, stdout, stderr) return False, stderr if "Created deployment" not in stdout: stderr = "'Created deployment' not in STDOUT" log.error(stderr) return False, stderr log.info("Waiting for app %s to be running...", app_name) shakedown.wait_for_task("marathon", app_name, TIMEOUT_SECONDS) return True, ""
def add_dcos_files_to_registry( tmpdir_factory # _pytest.TempdirFactory ) -> None: # Use DCOS_FILES_PATH if its set to a valid path OR use pytest's tmpdir. dcos_files_path = os.environ.get('DCOS_FILES_PATH', '') valid_path_set = os.path.isdir(dcos_files_path) if valid_path_set and not os.access(dcos_files_path, os.W_OK): log.warning('{} is not writable.'.format(dcos_files_path)) valid_path_set = False if not valid_path_set: dcos_files_path = str(tmpdir_factory.mktemp(sdk_utils.random_string())) stub_universe_urls = sdk_repository.get_universe_repos() log.info('Using {} to build .dcos files (if not exists) from {}'.format( dcos_files_path, stub_universe_urls)) dcos_files_list = build_dcos_files_from_stubs(stub_universe_urls, dcos_files_path, tmpdir_factory) log.info('Bundled .dcos files : {}'.format(dcos_files_list)) for file_path, name, version in dcos_files_list: rc, out, err = sdk_cmd.run_raw_cli(' '.join( ['registry', 'add', '--dcos-file={}'.format(file_path), '--json'])) assert rc == 0 assert len(json.loads(out)['packages']) > 0, 'No packages were added' wait_until_cli_condition( ' '.join([ 'registry', 'describe', '--package-name=' + name, '--package-version=' + version, '--json' ]), lambda code, out, err: code == 0 and json.loads(out).get( 'status') == 'Added')
def install_app_from_file(app_name: str, app_def_path: str) -> (bool, str): """ Installs a marathon app using the path to an app definition. Args: app_def_path: Path to app definition Returns: (bool, str) tuple: Boolean indicates success of install attempt. String indicates error message if install attempt failed. """ cmd = "marathon app add {}".format(app_def_path) log.info("Running %s", cmd) rc, stdout, stderr = sdk_cmd.run_raw_cli(cmd) if rc: log.error("returncode=%s stdout=%s stderr=%s", rc, stdout, stderr) return False, stderr if "Created deployment" not in stdout: stderr = "'Created deployment' not in STDOUT" log.error(stderr) return False, stderr log.info('Waiting for app %s to be deployed and running...', app_name) wait_for_deployment_and_app_running(app_name, TIMEOUT_SECONDS) return True, ''
def add_stub_universe_urls(stub_universe_urls: list) -> dict: stub_urls = {} if not stub_universe_urls: return stub_urls log.info('Adding stub URLs: {}'.format(stub_universe_urls)) for idx, url in enumerate(stub_universe_urls): log.info('URL {}: {}'.format(idx, repr(url))) package_name = 'testpkg-' package_name += ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(8)) stub_urls[package_name] = url # clean up any duplicate repositories current_universes = sdk_cmd.run_cli('package repo list --json') for repo in json.loads(current_universes)['repositories']: if repo['uri'] in stub_urls.values(): log.info('Removing duplicate stub URL: {}'.format(repo['uri'])) sdk_cmd.run_cli('package repo remove {}'.format(repo['name'])) # add the needed universe repositories for name, url in stub_urls.items(): log.info('Adding stub repo {} URL: {}'.format(name, url)) rc, stdout, stderr = sdk_cmd.run_raw_cli('package repo add --index=0 {} {}'.format(name, url)) if rc != 0 or stderr: raise Exception( 'Failed to add stub repo {} ({}): stdout=[{}], stderr=[{}]'.format( name, url, stdout, stderr)) log.info('Finished adding universe repos') return stub_urls
def check_broker(id: int): rc, stdout, stderr = sdk_cmd.run_raw_cli("task log kafka-{}-broker --lines 15".format(id)) if rc or not stdout: raise Exception("No task logs for kafka-{}-broker".format(id)) assert "java.net.NoRouteToHostException: No route to host" not in stdout
def submit_job(app_url, app_args, service_name=SPARK_SERVICE_NAME, args=[], spark_user=None, driver_role=SPARK_DRIVER_ROLE, verbose=True, principal=SPARK_SERVICE_ACCOUNT, use_cli=True): conf_args = args.copy() if driver_role: conf_args += ['--conf', 'spark.mesos.role={}'.format(driver_role)] if SPARK_DOCKER_USER is not None: conf_args += [ '--conf', 'spark.mesos.executor.docker.parameters=user={}'.format( SPARK_DOCKER_USER) ] if not list(filter(lambda x: "spark.driver.memory=" in x, conf_args)): conf_args += ['--conf', 'spark.driver.memory=2g'] if sdk_utils.is_strict_mode(): conf_args += ['--conf spark.mesos.principal={}'.format(principal)] if spark_user is not None: conf_args += [ '--conf spark.mesos.driverEnv.SPARK_USER={}'.format(spark_user) ] if not list(filter(lambda x: "spark.mesos.containerizer=" in x, conf_args)): conf_args += ['--conf', 'spark.mesos.containerizer=mesos'] submit_args = ' '.join([' '.join(conf_args), app_url, app_args]) verbose_flag = "--verbose" if verbose else "" result = None if use_cli: stdout = sdk_cmd.svc_cli( SPARK_PACKAGE_NAME, service_name, 'run {} --submit-args="{}"'.format(verbose_flag, submit_args)) result = re.search(r"Submission id: (\S+)", stdout) else: docker_cmd = "sudo docker run --net=host -ti {} bin/spark-submit {}".format( SPARK_DOCKER_IMAGE, submit_args) ssh_opts = "--option UserKnownHostsFile=/dev/null --option StrictHostKeyChecking=no" log.info("Running Docker command on leader: {}".format(docker_cmd)) _, stdout, stderr = sdk_cmd.run_raw_cli( "node ssh --master-proxy --leader --user={} {} '{}'".format( sdk_cmd.LINUX_USER, ssh_opts, docker_cmd)) result = re.search(r'"submissionId" : "(\S+)"', stdout) if not result: raise Exception( "Unable to find submission ID in stdout:\n{}".format(stdout)) return result.group(1)
def get_diagnostics_bundle(item: pytest.Item): rc, _, _ = sdk_cmd.run_raw_cli('node diagnostics create all') if rc: log.error('Diagnostics bundle creation failed.') return @retrying.retry( wait_fixed=5000, stop_max_delay=10*60*1000, retry_on_result=lambda result: result is None) def wait_for_bundle_file(): rc, stdout, stderr = sdk_cmd.run_raw_cli('node diagnostics --status --json') if rc: return None # e.g. { "some-ip": { stuff we want } } status = next(iter(json.loads(stdout).values())) if status['job_progress_percentage'] != 100: return None # e.g. "/var/lib/dcos/dcos-diagnostics/diag-bundles/bundle-2018-01-11-1515698691.zip" return os.path.basename(status['last_bundle_dir']) bundle_filename = wait_for_bundle_file() if bundle_filename: sdk_cmd.run_cli('node diagnostics download {} --location={}'.format( bundle_filename, setup_artifact_path(item, bundle_filename))) else: log.error('Diagnostics bundle didnt finish in time, giving up.')
def _dump_diagnostics_bundle(item: pytest.Item): '''Creates and downloads a DC/OS diagnostics bundle, and saves it to the artifact path for this test.''' rc, _, _ = sdk_cmd.run_raw_cli('node diagnostics create all') if rc: log.error('Diagnostics bundle creation failed.') return @retrying.retry( wait_fixed=5000, stop_max_delay=10*60*1000, retry_on_result=lambda result: result is None) def wait_for_bundle_file(): rc, stdout, stderr = sdk_cmd.run_raw_cli('node diagnostics --status --json') if rc: return None # e.g. { "some-ip": { stuff we want } } status = next(iter(json.loads(stdout).values())) if status['job_progress_percentage'] != 100: return None # e.g. "/var/lib/dcos/dcos-diagnostics/diag-bundles/bundle-2018-01-11-1515698691.zip" return os.path.basename(status['last_bundle_dir']) bundle_filename = wait_for_bundle_file() if bundle_filename: sdk_cmd.run_cli('node diagnostics download {} --location={}'.format( bundle_filename, _setup_artifact_path(item, bundle_filename))) else: log.error('Diagnostics bundle didnt finish in time, giving up.')
def add_dcos_files_to_registry( tmpdir_factory # _pytest.TempdirFactory ) -> None: # Use DCOS_FILES_PATH if its set to a valid path OR use pytest's tmpdir. dcos_files_path = os.environ.get('DCOS_FILES_PATH', '') valid_path_set = os.path.isdir(dcos_files_path) if valid_path_set and not os.access(dcos_files_path, os.W_OK): log.warning('{} is not writable.'.format(dcos_files_path)) valid_path_set = False if not valid_path_set: dcos_files_path = str(tmpdir_factory.mktemp(sdk_utils.random_string())) stub_universe_urls = sdk_repository.get_universe_repos() log.info('Using {} to build .dcos files (if not exists) from {}'.format( dcos_files_path, stub_universe_urls)) dcos_files_list = build_dcos_files_from_stubs(stub_universe_urls, dcos_files_path, tmpdir_factory) log.info('Bundled .dcos files : {}'.format(dcos_files_list)) @retrying.retry(stop_max_delay=5 * 60 * 1000, wait_fixed=5 * 1000) def wait_for_added_registry(name, version): code, stdout, stderr = sdk_cmd.run_raw_cli( 'registry describe --package-name={} --package-version={} --json'. format(name, version), print_output=False) assert code == 0 and json.loads(stdout).get('status') == 'Added' for file_path, name, version in dcos_files_list: rc, out, err = sdk_cmd.run_raw_cli( 'registry add --dcos-file={} --json'.format(file_path)) assert rc == 0 assert len(json.loads(out)['packages']) > 0, 'No packages were added' wait_for_added_registry(name, version)
def build_dcos_file_from_universe_definition( package: Dict, dcos_files_path: str, tmpdir_factory # _pytest.TempdirFactory ) -> Tuple[str, str, str]: """ Build the .dcos file if its not already present in the given directory. Returns a Tuple containing (path of .dcos file, name, and version) """ # TODO Ideally we should `migrate` and then `build`. name = package['name'] version = package['version'] target = os.path.join(dcos_files_path, '{}-{}.dcos'.format(name, version)) if os.path.isfile(target): log.info('Skipping build, using cached file : {}'.format(target)) else: del package['releaseVersion'] del package['selected'] package_json_file = tmpdir_factory\ .mktemp(sdk_utils.random_string())\ .join(sdk_utils.random_string()) package_json_file.write(json.dumps(package)) rc, _, _ = sdk_cmd.run_raw_cli(' '.join([ 'registry', 'build', '--build-definition-file={}'.format(str(package_json_file)), '--output-directory={}'.format(dcos_files_path), '--json' ])) assert rc == 0 assert os.path.isfile(target), 'No valid .dcos file is built' return target, name, version
def grant_perms_for_registry_account(service_uid: str) -> None: # Grant only required permissions to registry perms = 'dcos:adminrouter:ops:ca:rw' rc, _, _ = sdk_cmd.run_raw_cli(' '.join( ['security', 'org', 'users', 'grant', service_uid, perms, 'full'])) assert rc == 0, 'Required perms [{}] could not be obtained for {}'.format( perms, service_uid)
def install_enterprise_cli(force=False): """ Install the enterprise CLI if required """ log.info("Installing DC/OS enterprise CLI") if not force: cmd = "security --version" _, stdout, _ = sdk_cmd.run_raw_cli(cmd, print_output=False) if stdout: log.info("DC/OS enterprise version %s CLI already installed", stdout.strip()) return cmd = "package install --yes --cli dcos-enterprise-cli" @retrying.retry(stop_max_attempt_number=3, wait_fixed=2000, retry_on_result=lambda result: result) def _install_impl(): rc, stdout, stderr = sdk_cmd.run_raw_cli(cmd) if rc: log.error("rc=%s stdout=%s stderr=%s", rc, stdout, stderr) return rc try: _install_impl() except Exception as e: raise RuntimeError( "Failed to install the dcos-enterprise-cli: {}".format(repr(e)))
def __create_and_upload_secret(self, keytab_path: str): """ This method base64 encodes the keytab file and creates a secret with this encoded content so the tasks can fetch it. """ log.info("Creating and uploading the keytab file %s to the secret store", keytab_path) encoding_options = self.__encode_secret(keytab_path) sdk_security.install_enterprise_cli() # try to delete any preexisting secret data: sdk_security.delete_secret(self.keytab_secret_path) # create new secret: cmd_list = ["security", "secrets", "create", self.get_keytab_path(), ] cmd_list.extend(encoding_options) create_secret_cmd = " ".join(cmd_list) log.info("Creating secret %s: %s", self.get_keytab_path(), create_secret_cmd) rc, stdout, stderr = sdk_cmd.run_raw_cli(create_secret_cmd) if rc != 0: raise RuntimeError("Failed ({}) to create secret: {}\nstdout: {}\nstderr: {}".format(rc, create_secret_cmd, stdout, stderr)) log.info("Successfully uploaded a base64-encoded keytab file to the secret store")
def install(args): options_file = args['--options-json'] if options_file: if not os.path.isfile(options_file): # TODO: Replace with logging log.error("The specified file does not exist: %s", options_file) sys.exit(1) options = json.load(open(options_file, 'r')) else: options = get_default_options(args) if args['--package-repo']: sdk_repository.add_stub_universe_urls([args['--package-repo']]) rc, _, _ = sdk_cmd.run_raw_cli("package install spark --cli --yes") assert rc == 0, "Error installing spark CLI" quota_options = get_quota_options(args) services = {} services["spark"] = deploy_dispatchers( num_dispatchers=int(args['<num_dispatchers>']), service_name_base=args['<service_name_base>'], output_file=args['<output_file>'], linux_user=args["--user"], options=options, quota_options=quota_options) output_filename = "{}-dispatchers.json".format(args["<output_file>"]) with open(output_filename, "w") as fp: log.info("Saving dispatcher info to: %s", output_filename) json.dump(services, fp, indent=2)
def add_stub_universe_urls(stub_universe_urls: list) -> dict: stub_urls = {} if not stub_universe_urls: return stub_urls log.info('Adding stub URLs: {}'.format(stub_universe_urls)) for idx, url in enumerate(stub_universe_urls): log.info('URL {}: {}'.format(idx, repr(url))) package_name = 'testpkg-{}'.format(sdk_utils.random_string()) stub_urls[package_name] = url # clean up any duplicate repositories current_universes = sdk_cmd.run_cli('package repo list --json') for repo in json.loads(current_universes)['repositories']: if repo['uri'] in stub_urls.values(): log.info('Removing duplicate stub URL: {}'.format(repo['uri'])) sdk_cmd.run_cli('package repo remove {}'.format(repo['name'])) # add the needed universe repositories for name, url in stub_urls.items(): log.info('Adding stub repo {} URL: {}'.format(name, url)) rc, stdout, stderr = sdk_cmd.run_raw_cli( 'package repo add --index=0 {} {}'.format(name, url)) if rc != 0: raise Exception( 'Failed to add stub repo {} ({}): stdout=[{}], stderr=[{}]'. format(name, url, stdout, stderr)) log.info('Finished adding universe repos') return stub_urls
def _service_endpoint_dns(package_name, service_name, endpoint_name): cmd = "{package_name} --name={service_name} endpoints {endpoint_name}".format( package_name=package_name, service_name=service_name, endpoint_name=endpoint_name) rt, stdout, _ = sdk_cmd.run_raw_cli(cmd) assert rt == 0, "Failed to get {endpoint_name} endpoints" return json.loads(stdout)["dns"]
def get_task_log_for_id(task_id: str, task_file: str='stdout', lines: int=1000000) -> str: log.info('Fetching {} from {}'.format(task_file, task_id)) rc, stdout, stderr = sdk_cmd.run_raw_cli('task log {} --all --lines {} {}'.format(task_id, lines, task_file), print_output=False) if rc != 0: if not stderr.startswith('No files exist. Exiting.'): raise ConnectionError('Failed to get {} task log for task_id={}: {}'.format(task_file, task_id, stderr)) return '' return stdout
def check_broker(id: int): rc, stdout, _ = sdk_cmd.run_raw_cli("task log kafka-{}-broker --lines 1000".format(id), print_output=False) if rc or not stdout: raise Exception("No task logs for kafka-{}-broker".format(id)) last_log_index = stdout.rfind(broker_log_line[id]) success_index = stdout.rfind("zookeeper state changed (SyncConnected) (org.I0Itec.zkclient.ZkClient)") assert last_log_index > -1 and last_log_index < success_index, "{}:{} STDOUT: {}".format(last_log_index, success_index, stdout)
def wait_for_bundle_file(): rc, stdout, stderr = sdk_cmd.run_raw_cli('node diagnostics --status --json') if rc: return None # e.g. { "some-ip": { stuff we want } } status = next(iter(json.loads(stdout).values())) if status['job_progress_percentage'] != 100: return None # e.g. "/var/lib/dcos/dcos-diagnostics/diag-bundles/bundle-2018-01-11-1515698691.zip" return os.path.basename(status['last_bundle_dir'])
def wait_for_bundle_file(): rc, stdout, stderr = sdk_cmd.run_raw_cli('node diagnostics --status --json') if rc: return None # e.g. { "some-ip": { stuff we want } } status = next(iter(json.loads(stdout).values())) if status['job_progress_percentage'] != 100: return None # e.g. "/var/lib/dcos/dcos-diagnostics/diag-bundles/bundle-2018-01-11-1515698691.zip" return os.path.basename(status['last_bundle_dir'])
def test_spark_kafka_interservice(): if utils.kafka_enabled(): rc, stdout, stderr = sdk_cmd.run_raw_cli("package install {} --yes --cli".format(KAFKA_PACKAGE_NAME)) if rc != 0: LOGGER.warn("Got return code {rc} when trying to install {package} cli\nstdout:{out}\n{err}" .format(rc=rc, package=KAFKA_PACKAGE_NAME, out=stdout, err=stderr)) stop_count = os.getenv("STOP_COUNT", "1000") test_pipeline( kerberos_flag="true", stop_count=stop_count, jar_uri=JAR_URI, keytab_secret=KAFKA_KEYTAB_SECRET, spark_app_name=SOAK_SPARK_APP_NAME, jaas_uri=KAFKA_JAAS_URI)
def test_rpc_auth(): secret_name = "sparkauth" sdk_security.delete_secret(secret_name) rc, _, _ = sdk_cmd.run_raw_cli("{} --verbose secret /{}".format(utils.SPARK_PACKAGE_NAME, secret_name)) assert rc == 0, "Failed to generate Spark auth secret" utils.run_tests( app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", service_name=utils.SPARK_SERVICE_NAME, args=["--executor-auth-secret {}".format(secret_name), "--class org.apache.spark.examples.SparkPi"])
def task_exec(task_name : str, cmd: str, return_stderr_in_stdout: bool = False) -> tuple: """ Invokes the given command on the task via `dcos task exec`. :param task_name: Name of task to run command on. :param cmd: The command to execute. :return: a tuple consisting of the task exec's return code, stdout, and stderr """ exec_cmd = "task exec {task_name} {cmd}".format(task_name=task_name, cmd=cmd) rc, stdout, stderr = sdk_cmd.run_raw_cli(exec_cmd) if return_stderr_in_stdout: return rc, stdout + "\n" + stderr return rc, stdout, stderr
def remove_universe_repos(stub_urls): log.info('Removing universe repos') # clear out the added universe repositores at testing end for name, url in stub_urls.items(): log.info('Removing stub URL: {}'.format(url)) rc, stdout, stderr = sdk_cmd.run_raw_cli('package repo remove {}'.format(name)) if rc != 0 or stderr: if stderr.endswith('is not present in the list'): # tried to remove something that wasn't there, move on. pass else: raise Exception('Failed to remove stub repo: stdout=[{}], stderr=[{}]'.format(stdout, stderr)) log.info('Finished removing universe repos')
def setup_module(module): sdk_cmd.run_raw_cli("package install {} --yes --cli".format( utils.SPARK_PACKAGE_NAME)) if not shakedown.package_installed('spark', SOAK_SPARK_SERVICE_NAME): additional_options = { "hdfs": { "config-url": "http://api.{}.marathon.l4lb.thisdcos.directory/v1/endpoints". format(SOAK_HDFS_SERVICE_NAME) }, "security": { "kerberos": { "enabled": True, "realm": "LOCAL", "kdc": { "hostname": "kdc.marathon.autoip.dcos.thisdcos.directory", "port": 2500 } } } } utils.require_spark(service_name=SOAK_SPARK_SERVICE_NAME, additional_options=additional_options)
def check_broker(id: int): rc, stdout, _ = sdk_cmd.run_raw_cli( "task log kafka-{}-broker --lines 1000".format(id), print_output=False) if rc or not stdout: raise Exception("No task logs for kafka-{}-broker".format(id)) last_log_index = stdout.rfind(broker_log_line[id]) success_index = stdout.rfind( "zookeeper state changed (SyncConnected) (org.I0Itec.zkclient.ZkClient)" ) assert last_log_index > -1 and last_log_index < success_index, "{}:{} STDOUT: {}".format( last_log_index, success_index, stdout)
def test_zookeeper_reresolution(kafka_server): # First get the last logs lines for the kafka brokers broker_log_line = [] for id in range(0, config.DEFAULT_BROKER_COUNT): rc, stdout, _ = sdk_cmd.run_raw_cli( "task log kafka-{}-broker --lines 1".format(id)) if rc or not stdout: raise Exception("No task logs for kafka-{}-broker".format(id)) broker_log_line.append(stdout) def restart_zookeeper_node(id: int): sdk_cmd.svc_cli( config.ZOOKEEPER_PACKAGE_NAME, config.ZOOKEEPER_SERVICE_NAME, "pod restart zookeeper-{}".format(id), ) sdk_plan.wait_for_kicked_off_recovery(config.ZOOKEEPER_SERVICE_NAME) sdk_plan.wait_for_completed_recovery(config.ZOOKEEPER_SERVICE_NAME) # Restart each zookeeper node, so that each one receives a new IP address # (it's on a virtual network). This will force Kafka to re-resolve ZK nodes. for id in range(0, int(config.ZOOKEEPER_TASK_COUNT / 2)): restart_zookeeper_node(id) # Now, verify that Kafka remains happy def check_broker(id: int): rc, stdout, _ = sdk_cmd.run_raw_cli( "task log kafka-{}-broker --lines 1000".format(id), print_output=False) if rc or not stdout: raise Exception("No task logs for kafka-{}-broker".format(id)) last_log_index = stdout.rfind(broker_log_line[id]) success_index = stdout.rfind( "zookeeper state changed (SyncConnected) (org.I0Itec.zkclient.ZkClient)" ) assert last_log_index > -1 and last_log_index < success_index, "{}:{} STDOUT: {}".format( last_log_index, success_index, stdout) for id in range(0, config.DEFAULT_BROKER_COUNT): check_broker(id)
def __create_and_upload_secret(self, keytab_path: str): """ This method base64 encodes the keytab file and creates a secret with this encoded content so the tasks can fetch it. """ log.info( "Creating and uploading the keytab file %s to the secret store", keytab_path) try: base64_encoded_keytab_path = "{}.base64".format(keytab_path) with open(keytab_path, "rb") as f: keytab = f.read() base64_encoding = base64.b64encode(keytab).decode("utf-8") with open(base64_encoded_keytab_path, "w") as f: f.write(base64_encoding) log.info("Finished base64-encoding secret content (%d bytes): %s", len(base64_encoding), base64_encoding) except Exception as e: raise Exception( "Failed to base64-encode the keytab file: {}".format(repr(e))) self.keytab_secret_path = "{}_keytab".format(DCOS_BASE64_PREFIX) sdk_security.install_enterprise_cli() # try to delete any preexisting secret data: sdk_security.delete_secret(self.keytab_secret_path) # create new secret: create_secret_cmd = "security secrets create {keytab_secret_path} --value-file {encoded_keytab_path}".format( keytab_secret_path=self.keytab_secret_path, encoded_keytab_path=base64_encoded_keytab_path) log.info("Creating secret named %s from file %s: %s", self.keytab_secret_path, base64_encoded_keytab_path, create_secret_cmd) rc, stdout, stderr = sdk_cmd.run_raw_cli(create_secret_cmd) if rc != 0: raise RuntimeError( "Failed ({}) to create secret: {}\nstdout: {}\nstderr: {}". format(rc, create_secret_cmd, stdout, stderr)) log.info( "Successfully uploaded a base64-encoded keytab file to the secret store" )
def remove_universe_repos(stub_urls): log.info('Removing universe repos') # clear out the added universe repositores at testing end for name, url in stub_urls.items(): log.info('Removing stub URL: {}'.format(url)) rc, stdout, stderr = sdk_cmd.run_raw_cli( 'package repo remove {}'.format(name)) if rc != 0 or stderr: if stderr.endswith('is not present in the list'): # tried to remove something that wasn't there, move on. pass else: raise Exception( 'Failed to remove stub repo: stdout=[{}], stderr=[{}]'. format(stdout, stderr)) log.info('Finished removing universe repos')
def test_rpc_auth(): secret_name = "sparkauth" rc, stdout, stderr = sdk_cmd.run_raw_cli("{pkg} secret /{secret}".format( pkg=utils.SPARK_PACKAGE_NAME, secret=secret_name)) assert rc == 0, "Failed to generate Spark auth secret, stderr {err} stdout {out}".format( err=stderr, out=stdout) args = [ "--executor-auth-secret", secret_name, "--class", "org.apache.spark.examples.SparkPi" ] utils.run_tests(app_url=utils.SPARK_EXAMPLES, app_args="100", expected_output="Pi is roughly 3", app_name="/spark", args=args)
def test_zookeeper_reresolution(kafka_server): # First get the last logs lines for the kafka brokers broker_log_line = [] for id in range(0, config.DEFAULT_BROKER_COUNT): rc, stdout, _ = sdk_cmd.run_raw_cli("task log kafka-{}-broker --lines 1".format(id)) if rc or not stdout: raise Exception("No task logs for kafka-{}-broker".format(id)) broker_log_line.append(stdout) def restart_zookeeper_node(id: int): sdk_cmd.svc_cli(config.ZOOKEEPER_PACKAGE_NAME, config.ZOOKEEPER_SERVICE_NAME, "pod restart zookeeper-{}".format(id)) sdk_plan.wait_for_kicked_off_recovery(config.ZOOKEEPER_SERVICE_NAME) sdk_plan.wait_for_completed_recovery(config.ZOOKEEPER_SERVICE_NAME) # Restart each zookeeper node, so that each one receives a new IP address # (it's on a virtual network). This will force Kafka to re-resolve ZK nodes. for id in range(0, int(config.ZOOKEEPER_TASK_COUNT / 2)): restart_zookeeper_node(id) # Now, verify that Kafka remains happy @retrying.retry( wait_fixed=1000, stop_max_attempt_number=3) def check_broker(id: int): rc, stdout, _ = sdk_cmd.run_raw_cli("task log kafka-{}-broker --lines 1000".format(id), print_output=False) if rc or not stdout: raise Exception("No task logs for kafka-{}-broker".format(id)) last_log_index = stdout.rfind(broker_log_line[id]) success_index = stdout.rfind("zookeeper state changed (SyncConnected) (org.I0Itec.zkclient.ZkClient)") assert last_log_index > -1 and last_log_index < success_index, "{}:{} STDOUT: {}".format(last_log_index, success_index, stdout) for id in range(0, config.DEFAULT_BROKER_COUNT): check_broker(id)
def _get_pkg_version(package_name): cmd = 'package describe {}'.format(package_name) # Only log stdout/stderr if there's actually an error. rc, stdout, stderr = sdk_cmd.run_raw_cli(cmd, print_output=False) if rc != 0: log.warning('Failed to run "{}":\nSTDOUT:\n{}\nSTDERR:\n{}'.format(cmd, stdout, stderr)) return None try: describe = json.loads(stdout) # New location (either 1.10+ or 1.11+): version = describe.get('package', {}).get('version', None) if version is None: # Old location (until 1.9 or until 1.10): version = describe['version'] return version except: log.warning('Failed to extract package version from "{}":\nSTDOUT:\n{}\nSTDERR:\n{}'.format(cmd, stdout, stderr)) log.warning(traceback.format_exc()) return None
def make_credential_secret(path, val): sdk_security.delete_secret(path) rc, stdout, stderr = sdk_cmd.run_raw_cli( "security secrets create /{} -v {}".format(path, val)) assert rc == 0, "Failed to create secret {}, stderr: {}, stdout: {}".format( path, stderr, stdout)
def _install_impl(): rc, stdout, stderr = sdk_cmd.run_raw_cli(cmd) if rc: log.error("rc=%s stdout=%s stderr=%s", rc, stdout, stderr) return rc
def wait_for_registry_available(): code, stdout, stderr = sdk_cmd.run_raw_cli( 'registry describe --package-name=hello --package-version=world') assert code == 1 and 'Version [world] of package [hello] not found' in stderr
def _kafka_broker_dns(): cmd = "{package_name} --name={service_name} endpoints broker".format( package_name=KAFKA_PACKAGE_NAME, service_name=KAFKA_SERVICE_NAME) rt, stdout, stderr = sdk_cmd.run_raw_cli(cmd) assert rt == 0, "Failed to get broker endpoints" return json.loads(stdout)["dns"][0]