def get_role_component_current_hdp_version(): """ Gets the current HDP version of the component that this role command is for. :return: the current HDP version of the specified component or None """ hdp_select_component = None role = default("/role", "") role_command = default("/roleCommand", "") if role in SERVER_ROLE_DIRECTORY_MAP: hdp_select_component = SERVER_ROLE_DIRECTORY_MAP[role] elif role_command == "SERVICE_CHECK" and role in SERVICE_CHECK_DIRECTORY_MAP: hdp_select_component = SERVICE_CHECK_DIRECTORY_MAP[role] if hdp_select_component is None: return None current_hdp_version = get_hdp_version(hdp_select_component) if current_hdp_version is None: Logger.warning("Unable to determine hdp-select version for {0}".format( hdp_select_component)) else: Logger.info("{0} is currently at version {1}".format( hdp_select_component, current_hdp_version)) return current_hdp_version
def service_check(self, env): import params env.set_params(params) if not os.path.isfile(params.solr_config_pid_file): Logger.error(format("PID file {solr_config_pid_file} does not exist")) exit(1) if not params.solr_collection_sample_create: Logger.info("Create sample collection unchecked, skipping ...") return if exists_collection(params.solr_collection_name): Logger.warning(format("Collection {solr_collection_name} already exists, skipping ...")) return if not params.solr_cloud_mode: Execute( format( '{solr_config_bin_dir}/solr create_core -c {solr_collection_name}' + ' -d {solr_collection_config_dir} -p {solr_config_port} >> {solr_config_service_log_file} 2>&1' ), environment={'JAVA_HOME': params.java64_home}, user=params.solr_config_user ) else: Execute(format( '{solr_config_bin_dir}/solr create_collection -c {solr_collection_name}' + ' -d {solr_collection_config_dir} -p {solr_config_port}' + ' -s {solr_collection_shards} -rf {solr_collection_replicas}' + ' >> {solr_config_service_log_file} 2>&1'), environment={'JAVA_HOME': params.java64_home}, user=params.solr_config_user )
def get_hdp_version(): if not options.hdp_version: # Ubuntu returns: "stdin: is not a tty", as subprocess output. tmpfile = tempfile.NamedTemporaryFile() out = None with open(tmpfile.name, 'r+') as file: get_hdp_version_cmd = '/usr/bin/hdp-select status %s > %s' % ('hadoop-mapreduce-historyserver', tmpfile.name) code, stdoutdata = shell.call(get_hdp_version_cmd) out = file.read() pass if code != 0 or out is None: Logger.warning("Could not verify HDP version by calling '%s'. Return Code: %s, Output: %s." % (get_hdp_version_cmd, str(code), str(out))) return 1 matches = re.findall(r"([\d\.]+\-\d+)", out) hdp_version = matches[0] if matches and len(matches) > 0 else None if not hdp_version: Logger.error("Could not parse HDP version from output of hdp-select: %s" % str(out)) return 1 else: hdp_version = options.hdp_version return hdp_version
def bootstrap_standby_namenode(params, use_path=False): bin_path = os.path.join(params.hadoop_bin_dir, '') if use_path else "" try: iterations = 50 bootstrap_cmd = format("{bin_path}hdfs namenode -bootstrapStandby -nonInteractive") # Blue print based deployments start both NN in parallel and occasionally # the first attempt to bootstrap may fail. Depending on how it fails the # second attempt may not succeed (e.g. it may find the folder and decide that # bootstrap succeeded). The solution is to call with -force option but only # during initial start if params.command_phase == "INITIAL_START": bootstrap_cmd = format("{bin_path}hdfs namenode -bootstrapStandby -nonInteractive -force") Logger.info("Boostrapping standby namenode: %s" % (bootstrap_cmd)) for i in range(iterations): Logger.info('Try %d out of %d' % (i+1, iterations)) code, out = shell.call(bootstrap_cmd, logoutput=False, user=params.hdfs_user) if code == 0: Logger.info("Standby namenode bootstrapped successfully") return True elif code == 5: Logger.info("Standby namenode already bootstrapped") return True else: Logger.warning('Bootstrap standby namenode failed with %d error code. Will retry' % (code)) except Exception as ex: Logger.error('Bootstrap standby namenode threw an exception. Reason %s' %(str(ex))) return False
def _get_tar_source_and_dest_folder(tarball_prefix): """ :param tarball_prefix: Prefix of the tarball must be one of tez, hive, mr, pig :return: Returns a tuple of (x, y) after verifying the properties """ component_tar_source_file = default("/configurations/cluster-env/%s%s" % (tarball_prefix.lower(), TAR_SOURCE_SUFFIX), None) # E.g., /usr/hdp/current/hadoop-client/tez-{{ hdp_stack_version }}.tar.gz component_tar_destination_folder = default("/configurations/cluster-env/%s%s" % (tarball_prefix.lower(), TAR_DESTINATION_FOLDER_SUFFIX), None) # E.g., hdfs:///hdp/apps/{{ hdp_stack_version }}/mapreduce/ if not component_tar_source_file or not component_tar_destination_folder: Logger.warning("Did not find %s tar source file and destination folder properties in cluster-env.xml" % tarball_prefix) return None, None if component_tar_source_file.find("/") == -1: Logger.warning("The tar file path %s is not valid" % str(component_tar_source_file)) return None, None if not component_tar_destination_folder.endswith("/"): component_tar_destination_folder = component_tar_destination_folder + "/" if not component_tar_destination_folder.startswith("hdfs://"): return None, None return component_tar_source_file, component_tar_destination_folder
def refresh_configs(params): if not is_zk_configured(params): Logger.warning("The expected flag file '" + params.zk_configured_flag_file + "'indicating that Zookeeper has been configured does not exist. Skipping patching. An administrator should look into this.") return check_indexer_parameters() patch_global_config(params) pull_config(params)
def solr_schema_install(self, env): from params import params env.set_params(params) Logger.info("Installing Solr schemas") if self.__params.security_enabled: metron_security.kinit(self.__params.kinit_path_local, self.__params.solr_keytab_path, self.__params.solr_principal_name, self.__params.solr_user) try: commands = IndexingCommands(params) for collection_name in commands.get_solr_schemas(): # install the schema cmd = format(( "export ZOOKEEPER={solr_zookeeper_url};" "export SECURITY_ENABLED={security_enabled};" )) cmd += "{0}/bin/create_collection.sh {1};" Execute( cmd.format(params.metron_home, collection_name), user=self.__params.solr_user) return True except Exception as e: msg = "WARNING: Solr schemas could not be installed. " \ "Is Solr running? Will reattempt install on next start. error={0}" Logger.warning(msg.format(e)) return False
def get_current_version(use_upgrading_version_during_upgrade=True): """ Get the effective version to use to copy the tarballs to. :param use_upgrading_version_during_upgrade: True, except when the RU/EU hasn't started yet. :return: Version, or False if an error occurred. """ upgrade_direction = default("/commandParams/upgrade_direction", None) is_stack_upgrade = upgrade_direction is not None current_version = default("/hostLevelParams/current_version", None) Logger.info("Default version is {0}".format(current_version)) if is_stack_upgrade: if use_upgrading_version_during_upgrade: # This is the version going to. In the case of a downgrade, it is the lower version. current_version = default("/commandParams/version", None) Logger.info("Because this is a Stack Upgrade, will use version {0}".format(current_version)) else: Logger.info("This is a Stack Upgrade, but keep the version unchanged.") else: if current_version is None: # During normal operation, the first installation of services won't yet know about the version, so must rely # on <stack-selector> to get it. stack_version = _get_single_version_from_stack_select() if stack_version: Logger.info("Will use stack version {0}".format(stack_version)) current_version = stack_version if current_version is None: message_suffix = "during stack %s" % str(upgrade_direction) if is_stack_upgrade else "" Logger.warning("Cannot copy tarball because unable to determine current version {0}.".format(message_suffix)) return False return current_version
def select(stack_name, package, version, try_create=True, ignore_errors=False): """ Selects a config version for the specified package. If this detects that the stack supports configuration versioning but /etc/<component>/conf is a directory, then it will attempt to bootstrap the conf.backup directory and change /etc/<component>/conf into a symlink. :param stack_name: the name of the stack :param package: the name of the package, as-used by <conf-selector-tool> :param version: the version number to create :param try_create: optional argument to attempt to create the directory before setting it :param ignore_errors: optional argument to ignore any error and simply log a warning """ try: # do nothing if the stack does not support versioned configurations if not _valid(stack_name, package, version): return if try_create: create(stack_name, package, version) shell.checked_call(_get_cmd("set-conf-dir", package, version), logoutput=False, quiet=False, sudo=True) # for consistency sake, we must ensure that the /etc/<component>/conf symlink exists and # points to <stack-root>/current/<component>/conf - this is because some people still prefer to # use /etc/<component>/conf even though <stack-root> is the "future" package_dirs = get_package_dirs() if package in package_dirs: Logger.info("Ensuring that {0} has the correct symlink structure".format(package)) directory_list = package_dirs[package] for directory_structure in directory_list: conf_dir = directory_structure["conf_dir"] current_dir = directory_structure["current_dir"] # if /etc/<component>/conf is missing or is not a symlink if not os.path.islink(conf_dir): # if /etc/<component>/conf is not a link and it exists, convert it to a symlink if os.path.exists(conf_dir): parent_directory = os.path.dirname(conf_dir) conf_backup_dir = os.path.join(parent_directory, "conf.backup") # create conf.backup and copy files to it (if it doesn't exist) Execute(("cp", "-R", "-p", conf_dir, conf_backup_dir), not_if = format("test -e {conf_backup_dir}"), sudo = True) # delete the old /etc/<component>/conf directory and link to the backup Directory(conf_dir, action="delete") Link(conf_dir, to = conf_backup_dir) else: # missing entirely # /etc/<component>/conf -> <stack-root>/current/<component>/conf Link(conf_dir, to = current_dir) except Exception, exception: if ignore_errors is True: Logger.warning("Could not select the directory for package {0}. Error: {1}".format(package, str(exception))) else: raise
def get_role_component_current_stack_version(): """ Gets the current HDP version of the component that this role command is for. :return: the current HDP version of the specified component or None """ stack_select_component = None role = default("/role", "") role_command = default("/roleCommand", "") stack_selector_name = stack_tools.get_stack_tool_name(stack_tools.STACK_SELECTOR_NAME) if role in SERVER_ROLE_DIRECTORY_MAP: stack_select_component = SERVER_ROLE_DIRECTORY_MAP[role] elif role_command == "SERVICE_CHECK" and role in SERVICE_CHECK_DIRECTORY_MAP: stack_select_component = SERVICE_CHECK_DIRECTORY_MAP[role] if stack_select_component is None: return None current_stack_version = get_stack_version(stack_select_component) if current_stack_version is None: Logger.warning("Unable to determine {0} version for {1}".format( stack_selector_name, stack_select_component)) else: Logger.info("{0} is currently at version {1}".format( stack_select_component, current_stack_version)) return current_stack_version
def link_component_conf_to_versioned_config(package, version): """ Make /usr/hdp/[version]/[component]/conf point to the versioned config. """ try: select("HDP", package, version) except Exception, e: Logger.warning("Could not select the directory for package {0}. Error: {1}".format(package, e))
def copy_tarballs_to_hdfs(source, dest, hdp_select_component_name, component_user, file_owner, group_owner): """ :param tarball_prefix: Prefix of the tarball must be one of tez, hive, mr, pig :param hdp_select_component_name: Component name to get the status to determine the version :param component_user: User that will execute the Hadoop commands :param file_owner: Owner of the files copied to HDFS (typically hdfs account) :param group_owner: Group owner of the files copied to HDFS (typically hadoop group) :return: Returns 0 on success, 1 if no files were copied, and in some cases may raise an exception. In order to call this function, params.py must have all of the following, hdp_stack_version, kinit_path_local, security_enabled, hdfs_user, hdfs_principal_name, hdfs_user_keytab, hadoop_bin_dir, hadoop_conf_dir, and HdfsDirectory as a partial function. """ component_tar_source_file, component_tar_destination_folder = source, dest if not os.path.exists(component_tar_source_file): Logger.warning("Could not find file: %s" % str(component_tar_source_file)) return 1 # Ubuntu returns: "stdin: is not a tty", as subprocess output. tmpfile = tempfile.NamedTemporaryFile() out = None with open(tmpfile.name, 'r+') as file: get_hdp_version_cmd = '/usr/bin/hdp-select status %s > %s' % (hdp_select_component_name, tmpfile.name) code, stdoutdata = shell.call(get_hdp_version_cmd) out = file.read() pass if code != 0 or out is None: Logger.warning("Could not verify HDP version by calling '%s'. Return Code: %s, Output: %s." % (get_hdp_version_cmd, str(code), str(out))) return 1 matches = re.findall(r"([\d\.]+\-\d+)", out) hdp_version = matches[0] if matches and len(matches) > 0 else None if not hdp_version: Logger.error("Could not parse HDP version from output of hdp-select: %s" % str(out)) return 1 file_name = os.path.basename(component_tar_source_file) destination_file = os.path.join(component_tar_destination_folder, file_name) destination_file = destination_file.replace("{{ hdp_stack_version }}", hdp_version) kinit_if_needed = "" if params.security_enabled: kinit_if_needed = format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};") if kinit_if_needed: Execute(kinit_if_needed, user=component_user, path='/bin' ) source_and_dest_pairs = [(component_tar_source_file, destination_file), ] return _copy_files(source_and_dest_pairs, file_owner, group_owner, kinit_if_needed)
def status(self, env): import status_params env.set_params(status_params) # warring defalut port is 9090 process_check_command = "sudo ps aux | grep '/usr/hdp/2.2.0.0-2041/webide/webide-app' | grep -v 'grep' " output = self.command_exe(process_check_command) if not output: Logger.warning("{0} did not started!".format("webide APP server")) raise ComponentIsNotRunning()
def status(self, env): import status_params env.set_params(status_params) process_check_command = "ps -ef | grep 'org.apache.spark.sql.hive.thriftserver.HiveThriftServer2' | grep -v grep" output = self.command_exe(process_check_command) if not output: Logger.warning("{0} did not started!".format("Spark livy server")) raise ComponentIsNotRunning()
def _fix_start_type(self): if self.resource.start_type in ServiceConfigProvider.str_start_types.keys(): self.resource.start_type = ServiceConfigProvider.str_start_types[self.resource.start_type] elif (not self.resource.start_type or self.resource.start_type not in [ win32service.SERVICE_AUTO_START, win32service.SERVICE_DISABLED, win32service.SERVICE_DEMAND_START]): Logger.warning("Invalid service start type specified: service='{0}', start type='{1}'. Ignoring.".format( self.resource.service_name, str(self.resource.start_type))) self.resource.start_type = win32service.SERVICE_NO_CHANGE
def status(self, env): import status_params env.set_params(status_params) process_check_command = "ps -ef | grep 'com.cloudera.hue.livy.server.Main' | grep -v grep" output = self.command_exe(process_check_command) if not output: Logger.warning("{0} did not started!".format("Spark livy server")) raise ComponentIsNotRunning()
def refresh_configs(params): if not is_zk_configured(params): Logger.warning("The expected flag file '" + params.zk_configured_flag_file + "'indicating that Zookeeper has been configured does not exist. Skipping patching. An administrator should look into this.") return Logger.info("Patch global config in Zookeeper") patch_global_config(params) Logger.info("Done patching global config") Logger.info("Pull zookeeper config locally") pull_config(params)
def _call_command(command, logoutput=False, cwd=None, env=None, wait_for_finish=True, timeout=None, user=None, pid_file_name=None, poll_after=None): # TODO implement user Logger.info("Executing %s" % (command)) #adding redirecting stdout stderr to file outfilename = APPLICATION_STD_OUTPUT_LOG_FILE_PREFIX + APPLICATION_STD_OUTPUT_LOG_FILE_FILE_TYPE errfilename = APPLICATION_STD_ERROR_LOG_FILE_PREFIX + APPLICATION_STD_ERROR_LOG_FILE_FILE_TYPE stdoutFile = open(outfilename, 'w+') stderrFile = open(errfilename, 'w+') proc = subprocess.Popen(command, stdout = stdoutFile, stderr = stderrFile, universal_newlines = True, cwd=cwd, env=env, shell=False) code = None logAnyway = False if not wait_for_finish: Logger.debug("No need to wait for the process to exit. Will leave the process running ...") code = 0 logAnyway = False if pid_file_name: Logger.debug("Writing the process id %s to file %s" % (str(proc.pid), pid_file_name)) pidfile = open(pid_file_name, 'w') pidfile.write(str(proc.pid)) pidfile.close() Logger.info("Wrote the process id to file %s" % pid_file_name) ## wait poll_after seconds and poll if poll_after: time.sleep(poll_after) if proc.poll() is None: return code, None, None # if still running then return else: logAnyway = True # assume failure and log Logger.warning("Process is not up after the polling interval " + str(poll_after) + " seconds.") else: return code, None, None if timeout: q = Queue() t = threading.Timer(timeout, on_timeout, [proc, q]) t.start() out, err = proc.communicate() code = proc.returncode if logoutput or logAnyway: if out: Logger.info("Out: " + str(out)) if err: Logger.info("Err: " + str(err)) if code: Logger.info("Ret Code: " + str(code)) return code, out, err
def doRetries(hdfs_site, security_enabled, run_user): doRetries.attempt += 1 active_namenodes, standby_namenodes, unknown_namenodes = get_namenode_states_noretries(hdfs_site, security_enabled, run_user) Logger.info( "NameNode HA states: active_namenodes = {0}, standby_namenodes = {1}, unknown_namenodes = {2}".format( active_namenodes, standby_namenodes, unknown_namenodes)) if active_namenodes: return active_namenodes, standby_namenodes, unknown_namenodes elif doRetries.attempt == times: Logger.warning("No active NameNode was found after {0} retries. Will return current NameNode HA states".format(times)) return active_namenodes, standby_namenodes, unknown_namenodes raise Fail('No active NameNode was found.')
def __init__(self, path): # Sometimes (on heavy load) stat call returns an empty output with zero return code for i in range(0, self.RETRY_COUNT): out = shell.checked_call(["stat", "-c", "%u %g %a", path], sudo=True)[1] values = out.split(' ') if len(values) == 3: uid_str, gid_str, mode_str = values self.st_uid, self.st_gid, self.st_mode = int(uid_str), int(gid_str), int(mode_str, 8) break else: warning_message = "Can not parse a sudo stat call output: \"{0}\"".format(out) Logger.warning(warning_message) stat_val = os.stat(path) self.st_uid, self.st_gid, self.st_mode = stat_val.st_uid, stat_val.st_gid, stat_val.st_mode & 07777
def stop(self, env, rolling_restart=False): import params env.set_params(params) self.configure(env) try: pid = int(sudo.read_file(params.kafka_manager_pid_file)) code, out = shell.call(["kill","-15", str(pid)]) except: Logger.warning("Pid file {0} does not exist".format(params.kafka_manager_pid_file)) return if code: Logger.warning("Process with pid {0} is not running. Stale pid file" " at {1}".format(pid, params.kafka_manager_pid_file))
def set_dir_ownership(targets): import params if isinstance(targets, collections.Iterable): directories = targets else: # If target is a single object, convert it to list directories = [targets] for directory in directories: # If path is empty or a single slash, # may corrupt filesystem permissions if len(directory) > 1: Execute(('chown', '-R', format("{kafka_user}:{user_group}"), directory), sudo=True) else: Logger.warning("Permissions for the folder \"%s\" were not updated due to " "empty path passed: " % directory)
def check_stack_feature(stack_feature, stack_version): """ Given a stack_feature and a specific stack_version, it validates that the feature is supported by the stack_version. IMPORTANT, notice that the mapping of feature to version comes from cluster-env if it exists there. :param stack_feature: Feature name to check if it is supported by the stack. For example: "rolling_upgrade" :param stack_version: Version of the stack :return: Will return True if successful, otherwise, False. """ from resource_management.libraries.functions.default import default from resource_management.libraries.functions.version import compare_versions stack_name = default("/hostLevelParams/stack_name", None) if stack_name is None: Logger.warning("Cannot find the stack name in the command. Stack features cannot be loaded") return False stack_features_config = default("/configurations/cluster-env/stack_features", None) if not stack_version: Logger.debug("Cannot determine if feature %s is supported since did not provide a stack version." % stack_feature) return False if stack_features_config: data = json.loads(stack_features_config) if stack_name not in data: Logger.warning("Cannot find stack features for the stack named {0}".format(stack_name)) return False data = data[stack_name] for feature in data["stack_features"]: if feature["name"] == stack_feature: if "min_version" in feature: min_version = feature["min_version"] if compare_versions(stack_version, min_version, format = True) < 0: return False if "max_version" in feature: max_version = feature["max_version"] if compare_versions(stack_version, max_version, format = True) >= 0: return False return True else: raise Fail("Stack features not defined by stack") return False
def bootstrap_standby_namenode(params, use_path=False): mark_dirs = params.namenode_bootstrapped_mark_dirs bin_path = os.path.join(params.hadoop_bin_dir, '') if use_path else "" try: iterations = 50 bootstrapped = False bootstrap_cmd = format( "{bin_path}hdfs namenode -bootstrapStandby -nonInteractive") # Blue print based deployments start both NN in parallel and occasionally # the first attempt to bootstrap may fail. Depending on how it fails the # second attempt may not succeed (e.g. it may find the folder and decide that # bootstrap succeeded). The solution is to call with -force option but only # during initial start if params.command_phase == "INITIAL_START": # force bootstrap in INITIAL_START phase bootstrap_cmd = format( "{bin_path}hdfs namenode -bootstrapStandby -nonInteractive -force" ) elif is_namenode_bootstrapped(params): # Once out of INITIAL_START phase bootstrap only if we couldnt bootstrap during cluster deployment return True Logger.info("Boostrapping standby namenode: %s" % (bootstrap_cmd)) for i in range(iterations): Logger.info('Try %d out of %d' % (i + 1, iterations)) code, out = shell.call(bootstrap_cmd, logoutput=False, user=params.hdfs_user) if code == 0: Logger.info("Standby namenode bootstrapped successfully") bootstrapped = True break elif code == 5: Logger.info("Standby namenode already bootstrapped") bootstrapped = True break else: Logger.warning( 'Bootstrap standby namenode failed with %d error code. Will retry' % (code)) except Exception as ex: Logger.error( 'Bootstrap standby namenode threw an exception. Reason %s' % (str(ex))) if bootstrapped: for mark_dir in mark_dirs: Directory(mark_dir, create_parents=True) return bootstrapped
def is_topology_active(self): cmd_retrieve = "storm list | grep 'indexing'" proc = subprocess.Popen(cmd_retrieve, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) (stdout, stderr) = proc.communicate() Logger.info("Retrieval response is: %s" % stdout) Logger.warning("Error response is: %s" % stderr) fields = stdout.split() if len(fields) < 2: Logger.warning("Indexing topology is not running") return False # Get the second column, which is status. We already know first column is indexing) status = stdout.split()[1] running_status_set = ['ACTIVE', 'REBALANCING'] return status in running_status_set
def install(self, env): import jkg_toree_params as params self.install_packages(env) # Create user and group if they don't exist helpers.create_linux_user(params.user, params.group) # Create directories used by the service and service user Directory([ params.home_dir, params.jkg_pid_dir, params.log_dir, params.spark_config_dir ], mode=0755, create_parents=True, owner=params.user, group=params.group, recursive_ownership=True) if os.path.exists(params.py_venv_pathprefix): Logger.warning( "Virtualenv path prefix {0} to be used for JNBG service might already exist." "This is unexpected if the service or service component is being installed on the node for the first time." "It could indicate remnants from a prior installation.".format( params.py_venv_pathprefix)) # Setup bash scripts for execution for sh_script in params.sh_scripts: File(params.sh_scripts_dir + os.sep + sh_script, content=StaticFile(sh_script), mode=0750) for sh_script in params.sh_scripts_user: File(params.sh_scripts_dir + os.sep + sh_script, content=StaticFile(sh_script), mode=0755) # Run install commands for JKG defined in params for command in params.jkg_commands: Execute(command, logoutput=True) # Run install commands for Toree defined in params for command in params.toree_commands: Execute(command, logoutput=True) # Run setup commands for log4j for command in params.log4j_setup_commands: Execute(command, logoutput=True)
def _call_command(command, logoutput=False, cwd=None, env=None, wait_for_finish=True, timeout=None, user=None, pid_file_name=None, poll_after=None): # TODO implement user Logger.info("Executing %s" % (command)) proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd, env=env, shell=False) code = None if not wait_for_finish: Logger.debug("No need to wait for the process to exit. Will leave the process running ...") code = 0 logAnyway = False if pid_file_name: Logger.debug("Writing the process id %s to file %s" % (str(proc.pid), pid_file_name)) pidfile = open(pid_file_name, 'w') pidfile.write(str(proc.pid)) pidfile.close() Logger.info("Wrote the process id to file %s" % pid_file_name) ## wait poll_after seconds and poll if poll_after: time.sleep(poll_after) if proc.poll() is None: return code, None, None # if still running then return else: logAnyway = True # assume failure and log Logger.warning("Process is not up after the polling interval " + str(poll_after) + " seconds.") else: return code, None, None if timeout: q = Queue() t = threading.Timer(timeout, on_timeout, [proc, q]) t.start() out, err = proc.communicate() code = proc.returncode if logoutput or logAnyway: if out: Logger.info("Out: " + str(out)) if err: Logger.info("Err: " + str(err)) if code: Logger.info("Ret Code: " + str(code)) return code, out, err
def unlink_all_configs(self, env): """ Reverses the work performed in link_config. This should only be used when downgrading from HDP 2.3 to 2.2 in order to under the symlink work required for 2.3. """ stack_name = default('/hostLevelParams/stack_name', "").upper() downgrade_to_version = default('/commandParams/version', None) downgrade_from_version = default( '/commandParams/downgrade_from_version', None) upgrade_direction = default("/commandParams/upgrade_direction", Direction.UPGRADE) # downgrade only if upgrade_direction != Direction.DOWNGRADE: Logger.warning( "Unlinking configurations should only be performed on a downgrade." ) return if downgrade_to_version is None or downgrade_from_version is None: Logger.warning( "Both 'commandParams/version' and 'commandParams/downgrade_from_version' must be specified to unlink configs on downgrade." ) return Logger.info( "Unlinking all configs when downgrading from {0} {1} to {2}". format(stack_name, downgrade_from_version, downgrade_to_version)) # normalize the versions downgrade_to_version = format_stack_version(downgrade_to_version) downgrade_from_version = format_stack_version(downgrade_from_version) # downgrade-to-version must be 2.2 (less than 2.3) if downgrade_to_version and check_stack_feature( StackFeature.CONFIG_VERSIONING, downgrade_to_version): Logger.warning( "Unlinking configurations should not be performed when downgrading {0} {1} to {2}" .format(stack_name, downgrade_from_version, downgrade_to_version)) return # downgrade-from-version must be 2.3+ if not (downgrade_from_version and check_stack_feature( StackFeature.CONFIG_VERSIONING, downgrade_from_version)): Logger.warning( "Unlinking configurations should not be performed when downgrading {0} {1} to {2}" .format(stack_name, downgrade_from_version, downgrade_to_version)) return # iterate through all directory conf mappings and undo the symlinks for key, value in conf_select.get_package_dirs().iteritems(): for directory_mapping in value: original_config_directory = directory_mapping['conf_dir'] self._unlink_config(original_config_directory)
def set_dir_ownership(targets): import params if isinstance(targets, collections.Iterable): directories = targets else: # If target is a single object, convert it to list directories = [targets] for directory in directories: # If path is empty or a single slash, # may corrupt filesystem permissions if len(directory) > 1: Execute(('chown', '-R', format("{kafka_user}:{user_group}"), directory), sudo=True) else: Logger.warning( "Permissions for the folder \"%s\" were not updated due to " "empty path passed: " % directory)
def __init__(self, path): # Sometimes (on heavy load) stat call returns an empty output with zero return code for i in range(0, self.RETRY_COUNT): out = shell.checked_call(["stat", "-c", "%u %g %a", path], sudo=True)[1] values = out.split(' ') if len(values) == 3: uid_str, gid_str, mode_str = values self.st_uid, self.st_gid, self.st_mode = int(uid_str), int( gid_str), int(mode_str, 8) break else: warning_message = "Can not parse a sudo stat call output: \"{0}\"".format( out) Logger.warning(warning_message) stat_val = os.stat(path) self.st_uid, self.st_gid, self.st_mode = stat_val.st_uid, stat_val.st_gid, stat_val.st_mode & 07777
def unlink_all_configs(self, env): """ Reverses the work performed in link_config. This should only be used when downgrading from HDP 2.3 to 2.2 in order to under the symlink work required for 2.3. """ stack_name = default('/hostLevelParams/stack_name', "").upper() downgrade_to_version = default('/commandParams/version', None) downgrade_from_version = default('/commandParams/downgrade_from_version', None) upgrade_direction = default("/commandParams/upgrade_direction", Direction.UPGRADE) # downgrade only if upgrade_direction != Direction.DOWNGRADE: Logger.warning("Unlinking configurations should only be performed on a downgrade.") return # HDP only if stack_name != "HDP": Logger.warning("Unlinking configurations should only be performed on the HDP stack.") return if downgrade_to_version is None or downgrade_from_version is None: Logger.warning("Both 'commandParams/version' and 'commandParams/downgrade_from_version' must be specified to unlink configs on downgrade.") return Logger.info("Unlinking all configs when downgrading from HDP 2.3 to 2.2") # normalize the versions stack_23 = format_hdp_stack_version("2.3") downgrade_to_version = format_hdp_stack_version(downgrade_to_version) downgrade_from_version = format_hdp_stack_version(downgrade_from_version) # downgrade-to-version must be 2.2 (less than 2.3) if compare_versions(downgrade_to_version, stack_23) >= 0: Logger.warning("Unlinking configurations should only be performed when downgrading to HDP 2.2") return # downgrade-from-version must be 2.3+ if compare_versions(downgrade_from_version, stack_23) < 0: Logger.warning("Unlinking configurations should only be performed when downgrading from HDP 2.3 or later") return # iterate through all directory conf mappings and undo the symlinks for key, value in conf_select.PACKAGE_DIRS.iteritems(): for directory_mapping in value: original_config_directory = directory_mapping['conf_dir'] self._unlink_config(original_config_directory)
def get_package_from_available(self, name, available_packages_in_repos): """ This function matches package names with ${stack_version} placeholder to actual package names from Ambari-managed repository. Package names without ${stack_version} placeholder are returned as is. """ if STACK_VERSION_PLACEHOLDER not in name: return name package_delimiter = '-' if OSCheck.is_ubuntu_family() else '_' package_regex = name.replace( STACK_VERSION_PLACEHOLDER, '(\d|{0})+'.format(package_delimiter)) + "$" for package in available_packages_in_repos: if re.match(package_regex, package): return package Logger.warning("No package found for {0}({1})".format( name, package_regex))
def create_repo_files(template, command_repository): """ Creates repositories in a consistent manner for all types :param command_repository: a CommandRepository instance :type command_repository CommandRepository :return: a dictionary with repo ID => repo file name mapping """ if command_repository.version_id is None: raise Fail("The command repository was not parsed correctly") if 0 == len(command_repository.items): Logger.warning( "Repository for {0}/{1} has no repositories. Ambari may not be managing this version." .format(command_repository.stack_name, command_repository.version_string)) return {} append_to_file = False # initialize to False to create the file anew. repo_files = {} for repository in command_repository.items: if repository.repo_id is None: raise Fail("Repository with url {0} has no id".format( repository.base_url)) if not repository.ambari_managed: Logger.warning( "Repository for {0}/{1}/{2} is not managed by Ambari".format( command_repository.stack_name, command_repository.version_string, repository.repo_id)) else: Repository(repository.repo_id, action="create", base_url=repository.base_url, mirror_list=repository.mirrors_list, repo_file_name=command_repository.repo_filename, repo_template=template, components=repository.ubuntu_components, append_to_file=append_to_file) append_to_file = True repo_files[repository.repo_id] = command_repository.repo_filename return repo_files
def create_core_site_xml(conf_dir): import params if params.stack_supports_ranger_kerberos: if params.has_namenode: # if there is the viewFS mount table content, create separate xml config and include in in the core-site # else just create core-site if params.mount_table_content: XmlConfig("core-site.xml", conf_dir=conf_dir, configurations=params.config['configurations'] ['core-site'], configuration_attributes=params. config['configurationAttributes']['core-site'], owner=params.unix_user, group=params.unix_group, mode=0644, xml_include_file=os.path.join( conf_dir, params.xml_inclusion_file_name)) File(os.path.join(conf_dir, params.xml_inclusion_file_name), owner=params.unix_user, group=params.unix_group, content=params.mount_table_content, mode=0644) else: XmlConfig("core-site.xml", conf_dir=conf_dir, configurations=params.config['configurations'] ['core-site'], configuration_attributes=params. config['configurationAttributes']['core-site'], owner=params.unix_user, group=params.unix_group, mode=0644) else: Logger.warning( 'HDFS service not installed. Creating core-site.xml file.') XmlConfig("core-site.xml", conf_dir=conf_dir, configurations=params.core_site_property, configuration_attributes={}, owner=params.unix_user, group=params.unix_group, mode=0644)
def create_repo_files(template, command_repository): """ Creates repositories in a consistent manner for all types :param command_repository: a CommandRepository instance :return: """ if command_repository.version_id is None: raise Fail("The command repository was not parsed correctly") if 0 == len(command_repository.repositories): Logger.warning( "Repository for {0}/{1} has no repositories. Ambari may not be managing this version." .format(command_repository.stack_name, command_repository.version_string)) return # add the stack name to the file name just to make it a little easier to debug # version_id is the primary id of the repo_version table in the database file_name = "ambari-{0}-{1}".format(command_repository.stack_name.lower(), command_repository.version_id) append_to_file = False # initialize to False to create the file anew. for repository in command_repository.repositories: if repository.repo_id is None: raise Fail("Repository with url {0} has no id".format( repository.base_url)) if not repository.ambari_managed: Logger.warning( "Repository for {0}/{1}/{2} is not managed by Ambari".format( command_repository.stack_name, command_repository.version_string, repository.repo_id)) else: Repository(repository.repo_id, action="create", base_url=repository.base_url, mirror_list=repository.mirrors_list, repo_file_name=file_name, repo_template=template, components=repository.ubuntu_components, append_to_file=append_to_file) append_to_file = True
def service_check(self, env): import params Logger.info("Ambari Metrics service check was started.") env.set_params(params) results = execute_in_parallel(self.service_check_for_single_host, params.ams_collector_hosts, params) for host in params.ams_collector_hosts: if host in results: if results[host].status == SUCCESS: Logger.info( "Ambari Metrics service check passed on host " + host) return else: Logger.warning(results[host].result) raise Fail("All metrics collectors are unavailable.")
def execute_java_home_available_check(self, config): Logger.info("Java home check started.") java_home = config['commandParams']['java_home'] Logger.info("Java home to check: " + java_home) java_bin = "java" if OSCheck.is_windows_family(): java_bin = "java.exe" if not os.path.isfile(os.path.join(java_home, "bin", java_bin)): Logger.warning("Java home doesn't exist!") java_home_check_structured_output = {"exit_code" : 1, "message": "Java home doesn't exist!"} else: Logger.info("Java home exists!") java_home_check_structured_output = {"exit_code" : 0, "message": "Java home exists!"} Logger.info("Java home check completed.") return java_home_check_structured_output
def should_install_lzo(): """ Return true if lzo is enabled via core-site.xml and GPL license (required for lzo) is accepted. """ config = Script.get_config() io_compression_codecs = default( "/configurations/core-site/io.compression.codecs", None) lzo_enabled = io_compression_codecs is not None and "com.hadoop.compression.lzo" in io_compression_codecs.lower( ) if not lzo_enabled: return False if not is_gpl_license_accepted(): Logger.warning(INSTALLING_LZO_WITHOUT_GPL) return False return True
def start(self, env, upgrade_type=None): from params import params env.set_params(params) self.configure(env) commands = IndexingCommands(params) # Install elasticsearch templates try: if not commands.is_elasticsearch_template_installed(): self.elasticsearch_template_install(env) commands.set_elasticsearch_template_installed() except Exception as e: msg = "WARNING: Elasticsearch index templates could not be installed. " \ "Is Elasticsearch running? Will reattempt install on next start. error={0}" Logger.warning(msg.format(e)) commands.start_indexing_topology(env)
def restart_enrichment_topology(self, env): Logger.info('Restarting the enrichment topologies') self.stop_enrichment_topology() # Wait for old topology to be cleaned up by Storm, before starting again. retries = 0 topology_active = self.is_topology_active(env) while topology_active and retries < 3: Logger.info('Existing topology still active. Will wait and retry') time.sleep(40) topology_active = self.is_topology_active(env) retries += 1 if not topology_active: self.start_enrichment_topology() Logger.info('Done restarting the enrichment topology') else: Logger.warning('Retries exhausted. Existing topology not cleaned up. Aborting topology start.')
def restart_enrichment_topology(self, env): Logger.info('Restarting the enrichment topologies') self.stop_enrichment_topology(env) # Wait for old topology to be cleaned up by Storm, before starting again. retries = 0 topology_active = self.is_topology_active(env) while topology_active and retries < 3: Logger.info('Existing topology still active. Will wait and retry') time.sleep(40) topology_active = self.is_topology_active(env) retries += 1 if not topology_active: self.start_enrichment_topology(env) Logger.info('Done restarting the enrichment topology') else: Logger.warning('Retries exhausted. Existing topology not cleaned up. Aborting topology start.')
def service_check(self, env): import params env.set_params(params) time.sleep(5) health_url = "http://{0}:{1}/_cluster/health?wait_for_status=green&timeout=120s".format( params.hostname, params.elasticSearchHttpPort) fd = urllib2.urlopen(health_url) content = fd.read() fd.close() result = json.loads(content) status = result["status"] == u"green" if not status: Logger.warning("Elasticsearch service check failed") sys.exit(1) else: Logger.info("Elasticsearch service check successful") sys.exit(0)
def get_stack_root(stack_name, stack_root_json): """ Get the stack-specific install root directory from the raw, JSON-escaped properties. :param stack_name: :param stack_root_json: :return: stack_root """ from resource_management.libraries.functions.default import default if stack_root_json is None: return "/usr/{0}".format(stack_name.lower()) stack_root = json.loads(stack_root_json) if stack_name not in stack_root: Logger.warning("Cannot determine stack root for stack named {0}".format(stack_name)) return "/usr/{0}".format(stack_name.lower()) return stack_root[stack_name]
def bootstrap_standby_namenode(params): try: iterations = 50 bootstrap_cmd = "hdfs namenode -bootstrapStandby -nonInteractive" Logger.info("Boostrapping standby namenode: %s" % (bootstrap_cmd)) for i in range(iterations): Logger.info('Try %d out of %d' % (i+1, iterations)) code, out = shell.call(bootstrap_cmd, logoutput=False, user=params.hdfs_user) if code == 0: Logger.info("Standby namenode bootstrapped successfully") return True elif code == 5: Logger.info("Standby namenode already bootstrapped") return True else: Logger.warning('Bootstrap standby namenode failed with %d error code. Will retry' % (code)) except Exception as ex: Logger.error('Bootstrap standby namenode threw an exception. Reason %s' %(str(ex))) return False
def __dump_db(self, command, type, is_db_here): dump_dir = "/etc/hive/dbdump" dump_file = format("{dump_dir}/hive-{stack_version_formatted}-{type}-dump.sql") command = format("mkdir -p {dump_dir}; " + command) if is_db_here: Execute(command, user = "******") Logger.info(format("Hive Metastore database backup created at {dump_file}")) else: Logger.warning("MANUAL DB DUMP REQUIRED!!") Logger.warning(format("Hive Metastore is using an external {hive_metastore_db_type} database, the connection url is {hive_jdbc_connection_url}.")) Logger.warning("Please log in to that host, and create a db backup manually by executing the following command:") Logger.warning(format("\"{command}\""))
def doRetries(hdfs_site, security_enabled, run_user): doRetries.attempt += 1 active_namenodes, standby_namenodes, unknown_namenodes = get_namenode_states_noretries( hdfs_site, security_enabled, run_user, doRetries.attempt == times, name_service=name_service) Logger.info( "NameNode HA states: active_namenodes = {0}, standby_namenodes = {1}, unknown_namenodes = {2}" .format(active_namenodes, standby_namenodes, unknown_namenodes)) if active_namenodes: return active_namenodes, standby_namenodes, unknown_namenodes elif doRetries.attempt == times: Logger.warning( "No active NameNode was found after {0} retries. Will return current NameNode HA states" .format(times)) return active_namenodes, standby_namenodes, unknown_namenodes raise Fail('No active NameNode was found.')
def initialize_ha_zookeeper(params): try: iterations = 10 formatZK_cmd = "hdfs zkfc -formatZK -nonInteractive" Logger.info("Initialize HA state in ZooKeeper: %s" % (formatZK_cmd)) for i in range(iterations): Logger.info('Try %d out of %d' % (i+1, iterations)) code, out = shell.call(formatZK_cmd, logoutput=False, user=params.hdfs_user) if code == 0: Logger.info("HA state initialized in ZooKeeper successfully") return True elif code == 2: Logger.info("HA state already initialized in ZooKeeper") return True else: Logger.warning('HA state initialization in ZooKeeper failed with %d error code. Will retry' % (code)) except Exception as ex: Logger.error('HA state initialization in ZooKeeper threw an exception. Reason %s' %(str(ex))) return False
def setup_config(): import params stackversion = params.stack_version_unformatted is_hadoop_conf_dir_present = False if hasattr(params, "hadoop_conf_dir") and params.hadoop_conf_dir is not None and os.path.exists(params.hadoop_conf_dir): is_hadoop_conf_dir_present = True else: Logger.warning("Parameter hadoop_conf_dir is missing or directory does not exist. This is expected if this host does not have any Hadoop components.") if is_hadoop_conf_dir_present and (params.has_namenode or stackversion.find('Gluster') >= 0): # create core-site only if the hadoop config diretory exists XmlConfig("core-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configuration_attributes']['core-site'], owner=params.hdfs_user, group=params.user_group, only_if=format("ls {hadoop_conf_dir}"))
def restart_indexing_topology(self, env): Logger.info('Restarting the indexing topologies') self.stop_indexing_topology(env) # Wait for old topology to be cleaned up by Storm, before starting again. retries = 0 topology_active = self.is_topology_active(env) while self.is_topology_active(env) and retries < 3: Logger.info('Existing topology still active. Will wait and retry') time.sleep(10) retries += 1 if not topology_active: Logger.info('Waiting for storm kill to complete') time.sleep(30) self.start_indexing_topology(env) Logger.info('Done restarting the indexing topologies') else: Logger.warning('Retries exhausted. Existing topology not cleaned up. Aborting topology start.')
def get_stack_root(): """ Get the stack-specific install root directory :return: stack_root """ from resource_management.libraries.functions.default import default stack_name = Script.get_stack_name() stack_root_json = default("/configurations/cluster-env/stack_root", None) if stack_root_json is None: return "/usr/{0}".format(stack_name.lower()) stack_root = json.loads(stack_root_json) if stack_name not in stack_root: Logger.warning("Cannot determine stack root for stack named {0}".format(stack_name)) return "/usr/{0}".format(stack_name.lower()) return stack_root[stack_name]
def start(self, env, upgrade_type=None): import params import status_params nifi_toolkit_util_common.copy_toolkit_scripts(params.toolkit_files_dir, params.toolkit_tmp_dir, params.nifi_user, params.nifi_group, upgrade_type, service=nifi_toolkit_util_common.NIFI) self.configure(env, is_starting = True) setup_ranger_nifi(upgrade_type=None) Execute ('export JAVA_HOME='+params.jdk64_home+';'+params.bin_dir+'/nifi.sh start >> ' + params.nifi_node_log_file, user=params.nifi_user) #If nifi pid file not created yet, wait a bit if not os.path.isfile(status_params.nifi_pid_dir+'/nifi.pid'): Execute ('sleep 5') if params.nifi_registry_url and params.stack_support_nifi_auto_client_registration and not params.force_skip_registry_registration: Logger.info("Trying to register NIFI Registry. This can take up to several minutes. Please, wait...") try: nifi_cli.create_or_update_reg_client(params.nifi_registry_host, params.nifi_registry_url) except: Logger.warning("Unable to create or update registry client. Please review NiFi to setup the registry manually if necessary.")
def restart_indexing_topology(self, env): Logger.info('Restarting the indexing topologies') self.stop_indexing_topology() # Wait for old topology to be cleaned up by Storm, before starting again. retries = 0 topology_active = self.is_topology_active(env) while self.is_topology_active(env) and retries < 3: Logger.info('Existing topology still active. Will wait and retry') time.sleep(10) retries += 1 if not topology_active: Logger.info('Waiting for storm kill to complete') time.sleep(30) self.start_indexing_topology() Logger.info('Done restarting the indexing topologies') else: Logger.warning('Retries exhausted. Existing topology not cleaned up. Aborting topology start.')
def setup_config(): import params stackversion = params.stack_version_unformatted Logger.info("FS Type: {0}".format(params.dfs_type)) is_hadoop_conf_dir_present = False if hasattr(params, "hadoop_conf_dir" ) and params.hadoop_conf_dir is not None and os.path.exists( params.hadoop_conf_dir): is_hadoop_conf_dir_present = True else: Logger.warning( "Parameter hadoop_conf_dir is missing or directory does not exist. This is expected if this host does not have any Hadoop components." ) if is_hadoop_conf_dir_present and (params.has_namenode or stackversion.find('Gluster') >= 0 or params.dfs_type == 'HCFS'): # create core-site only if the hadoop config diretory exists XmlConfig( "core-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configurationAttributes'] ['core-site'], owner=params.hdfs_user, group=params.user_group, only_if=format("ls {hadoop_conf_dir}")) Directory(params.logsearch_logfeeder_conf, mode=0755, cd_access='a', create_parents=True) if params.logsearch_config_file_exists: File(format("{logsearch_logfeeder_conf}/" + params.logsearch_config_file_name), content=Template(params.logsearch_config_file_path, extra_imports=[default])) else: Logger.warning('No logsearch configuration exists at ' + params.logsearch_config_file_path)
def check_installed_metrics_hadoop_sink_version( hadoop_sink_package_name="ambari-metrics-hadoop-sink", checked_version="2.7.0.0", less_valid=True, equal_valid=False): # The default package name is different for ubuntu and debian, so if the dafault one is used change the name if hadoop_sink_package_name == "ambari-metrics-hadoop-sink" and OSCheck.is_ubuntu_family( ): hadoop_sink_package_name = "ambari-metrics-assembly" pkg_provider = ManagerFactory.get() hadoop_sink_version = pkg_provider.get_installed_package_version( hadoop_sink_package_name) if not hadoop_sink_version: Logger.warning( "Couldn't determine %s package version, skipping the sink version check" % hadoop_sink_package_name) return else: if "-" in hadoop_sink_version: hadoop_sink_version = hadoop_sink_version.split("-")[0] # installed version should be less than next version compare_result = compare_versions(hadoop_sink_version, checked_version) if equal_valid and compare_result == 0: pass elif less_valid and compare_result != -1: raise Fail( "%s installed package version is %s. It should be less than %s due to" " incompatibility. Please downgrade the package or upgrade the stack and try again." % (hadoop_sink_package_name, hadoop_sink_version, checked_version)) elif not less_valid and compare_result != 1: raise Fail( "%s installed package version is %s. It should be greater than or equal to %s due to" " incompatibility. Please upgrade the package or downgrade the stack and try again." % (hadoop_sink_package_name, hadoop_sink_version, checked_version)) Logger.info("ambari-metrics-hadoop-sink package version is OK")
def copy_tarballs_to_hdfs(source, dest, stack_select_component_name, component_user, file_owner, group_owner): """ :param tarball_prefix: Prefix of the tarball must be one of tez, hive, mr, pig :param stack_select_component_name: Component name to get the status to determine the version :param component_user: User that will execute the Hadoop commands :param file_owner: Owner of the files copied to HDFS (typically hdfs account) :param group_owner: Group owner of the files copied to HDFS (typically hadoop group) :return: Returns 0 on success, 1 if no files were copied, and in some cases may raise an exception. In order to call this function, params.py must have all of the following, stack_version_formatted, kinit_path_local, security_enabled, hdfs_user, hdfs_principal_name, hdfs_user_keytab, hadoop_bin_dir, hadoop_conf_dir, and HdfsDirectory as a partial function. """ component_tar_source_file, component_tar_destination_folder = source, dest if not os.path.exists(component_tar_source_file): Logger.warning("Could not find file: %s" % str(component_tar_source_file)) return 1 file_name = os.path.basename(component_tar_source_file) destination_file = os.path.join(component_tar_destination_folder, file_name) destination_file = destination_file.replace( "{{ stack_version_formatted }}", stack_version) kinit_if_needed = "" if params.security_enabled: kinit_if_needed = format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};" ) if kinit_if_needed: Execute(kinit_if_needed, user=component_user, path='/bin') source_and_dest_pairs = [ (component_tar_source_file, destination_file), ] return _copy_files(source_and_dest_pairs, file_owner, group_owner, kinit_if_needed)
def setup_config(): import params stackversion = params.stack_version_unformatted Logger.info("FS Type: {0}".format(params.dfs_type)) is_hadoop_conf_dir_present = False if hasattr(params, "hadoop_conf_dir") and params.hadoop_conf_dir is not None and os.path.exists(params.hadoop_conf_dir): is_hadoop_conf_dir_present = True else: Logger.warning("Parameter hadoop_conf_dir is missing or directory does not exist. This is expected if this host does not have any Hadoop components.") if is_hadoop_conf_dir_present and (params.has_namenode or stackversion.find('Gluster') >= 0 or params.dfs_type == 'HCFS'): # create core-site only if the hadoop config diretory exists XmlConfig("core-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configuration_attributes']['core-site'], owner=params.hdfs_user, group=params.user_group, only_if=format("ls {hadoop_conf_dir}"))