def should_install_falcon_atlas_hook(): config = Script.get_config() stack_version_unformatted = config['hostLevelParams']['stack_version'] stack_version_formatted = format_stack_version(stack_version_unformatted) if check_stack_feature(StackFeature.FALCON_ATLAS_SUPPORT_2_3, stack_version_formatted) \ or check_stack_feature(StackFeature.FALCON_ATLAS_SUPPORT, stack_version_formatted): return _has_applicable_local_component(config, ['FALCON_SERVER']) return False
def pre_upgrade_restart(self, env, upgrade_type=None): import params env.set_params(params) if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version): Logger.info("Executing Spark2 Client Stack Upgrade pre-restart") conf_select.select(params.stack_name, "spark", params.version) stack_select.select("spark2-client", params.version)
def pre_upgrade_restart(self, env, upgrade_type=None): import params env.set_params(params) if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version): Logger.info("Executing Spark2 Job History Server Stack Upgrade pre-restart") conf_select.select(params.stack_name, "spark2", params.version) stack_select.select("spark2-historyserver", params.version) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.version and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.version): resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute")
def get_hadoop_dir(target, force_latest_on_upgrade=False): """ Return the hadoop shared directory in the following override order 1. Use default for 2.1 and lower 2. If 2.2 and higher, use <stack-root>/current/hadoop-client/{target} 3. If 2.2 and higher AND for an upgrade, use <stack-root>/<version>/hadoop/{target}. However, if the upgrade has not yet invoked <stack-selector-tool>, return the current version of the component. :target: the target directory :force_latest_on_upgrade: if True, then this will return the "current" directory without the stack version built into the path, such as <stack-root>/current/hadoop-client """ stack_root = Script.get_stack_root() stack_version = Script.get_stack_version() if not target in HADOOP_DIR_DEFAULTS: raise Fail("Target {0} not defined".format(target)) hadoop_dir = HADOOP_DIR_DEFAULTS[target] formatted_stack_version = format_stack_version(stack_version) if formatted_stack_version and check_stack_feature(StackFeature.ROLLING_UPGRADE, formatted_stack_version): # home uses a different template if target == "home": hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client") else: hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client", target) # if we are not forcing "current" for HDP 2.2, then attempt to determine # if the exact version needs to be returned in the directory if not force_latest_on_upgrade: stack_info = _get_upgrade_stack() if stack_info is not None: stack_version = stack_info[1] # determine if <stack-selector-tool> has been run and if not, then use the current # hdp version until this component is upgraded current_stack_version = get_role_component_current_stack_version() if current_stack_version is not None and stack_version != current_stack_version: stack_version = current_stack_version if target == "home": # home uses a different template hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop") else: hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop", target) return hadoop_dir
def should_expose_component_version(self, command_name): """ Analyzes config and given command to determine if stack version should be written to structured out. Currently only HDP stack versions >= 2.2 are supported. :param command_name: command name :return: True or False """ from resource_management.libraries.functions.default import default stack_version_unformatted = str(default("/hostLevelParams/stack_version", "")) stack_version_formatted = format_stack_version(stack_version_unformatted) if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted): if command_name.lower() == "status": request_version = default("/commandParams/request_version", None) if request_version is not None: return True else: # Populate version only on base commands return command_name.lower() == "start" or command_name.lower() == "install" or command_name.lower() == "restart" return False
def get_lzo_packages(stack_version_unformatted): lzo_packages = [] script_instance = Script.get_instance() if OSCheck.is_suse_family() and int(OSCheck.get_os_major_version()) >= 12: lzo_packages += ["liblzo2-2", "hadoop-lzo-native"] elif OSCheck.is_redhat_family() or OSCheck.is_suse_family(): lzo_packages += ["lzo", "hadoop-lzo-native"] elif OSCheck.is_ubuntu_family(): lzo_packages += ["liblzo2-2"] if stack_version_unformatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_unformatted): if OSCheck.is_ubuntu_family(): lzo_packages += [script_instance.format_package_name("hadooplzo-${stack_version}") , script_instance.format_package_name("hadooplzo-${stack_version}-native")] else: lzo_packages += [script_instance.format_package_name("hadooplzo_${stack_version}"), script_instance.format_package_name("hadooplzo_${stack_version}-native")] else: lzo_packages += ["hadoop-lzo"] return lzo_packages
def get_config_dir_during_stack_upgrade(self, env, base_dir, conf_select_name): """ Because this gets called during a Rolling Upgrade, the new configs have already been saved, so we must be careful to only call configure() on the directory with the new version. If valid, returns the config directory to save configs to, otherwise, return None """ import params env.set_params(params) required_attributes = ["stack_name", "stack_root", "version"] for attribute in required_attributes: if not hasattr(params, attribute): raise Fail("Failed in function 'stack_upgrade_save_new_config' because params was missing variable %s." % attribute) Logger.info("stack_upgrade_save_new_config(): Checking if can write new client configs to new config version folder.") if check_stack_feature(StackFeature.CONFIG_VERSIONING, params.version): # Even though hdp-select has not yet been called, write new configs to the new config directory. config_path = os.path.join(params.stack_root, params.version, conf_select_name, "conf") return os.path.realpath(config_path) return None
def get_hadoop_dir_for_stack_version(target, stack_version): """ Return the hadoop shared directory for the provided stack version. This is necessary when folder paths of downgrade-source stack-version are needed after <stack-selector-tool>. :target: the target directory :stack_version: stack version to get hadoop dir for """ stack_root = Script.get_stack_root() if not target in HADOOP_DIR_DEFAULTS: raise Fail("Target {0} not defined".format(target)) hadoop_dir = HADOOP_DIR_DEFAULTS[target] formatted_stack_version = format_stack_version(stack_version) if formatted_stack_version and check_stack_feature(StackFeature.ROLLING_UPGRADE, formatted_stack_version): # home uses a different template if target == "home": hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop") else: hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop", target) return hadoop_dir
stack_name = status_params.stack_name current_version = default("/hostLevelParams/current_version", None) component_directory = status_params.component_directory # New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade version = default("/commandParams/version", None) # default parameters zk_home = "/usr" zk_bin = "/usr/lib/zookeeper/bin" zk_cli_shell = "/usr/lib/zookeeper/bin/zkCli.sh" config_dir = "/etc/zookeeper/conf" zk_smoke_out = os.path.join(tmp_dir, "zkSmoke.out") # hadoop parameters for stacks that support rolling_upgrade if stack_version_formatted and check_stack_feature( StackFeature.ROLLING_UPGRADE, stack_version_formatted): zk_home = format("{stack_root}/current/{component_directory}") zk_bin = format("{stack_root}/current/{component_directory}/bin") zk_cli_shell = format( "{stack_root}/current/{component_directory}/bin/zkCli.sh") config_dir = status_params.config_dir zk_user = config['configurations']['zookeeper-env']['zk_user'] hostname = config['hostname'] user_group = config['configurations']['cluster-env']['user_group'] zk_env_sh_template = config['configurations']['zookeeper-env']['content'] zk_log_dir = config['configurations']['zookeeper-env']['zk_log_dir'] zk_data_dir = config['configurations']['zoo.cfg']['dataDir'] zk_pid_dir = status_params.zk_pid_dir zk_pid_file = status_params.zk_pid_file
def copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type=None, upgrade_direction=None): """ If the Atlas Hive Hook direcotry is present, Atlas is installed, and this is the first Oozie Server, then copy the entire contents of that directory to the Oozie Sharelib in DFS, e.g., /usr/$stack/$current_version/atlas/hook/hive/ -> hdfs:///user/oozie/share/lib/lib_$timetamp/hive :param upgrade_type: If in the middle of a stack upgrade, the type as UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING :param upgrade_direction: If in the middle of a stack upgrade, the direction as Direction.UPGRADE or Direction.DOWNGRADE. """ import params # Calculate the effective version since this code can also be called during EU/RU in the upgrade direction. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version( params.version) if not check_stack_feature(StackFeature.ATLAS_HOOK_SUPPORT, effective_version): return # Important that oozie_server_hostnames is sorted by name so that this only runs on a single Oozie server. if not (len(params.oozie_server_hostnames) > 0 and params.hostname == params.oozie_server_hostnames[0]): Logger.debug( "Will not attempt to copy Atlas Hive hook to DFS since this is not the first Oozie Server " "sorted by hostname.") return if not has_atlas_in_cluster(): Logger.debug( "Will not attempt to copy Atlas Hve hook to DFS since Atlas is not installed on the cluster." ) return if upgrade_type is not None and upgrade_direction == Direction.DOWNGRADE: Logger.debug( "Will not attempt to copy Atlas Hve hook to DFS since in the middle of Rolling/Express upgrade " "and performing a Downgrade.") return current_version = get_current_version() atlas_hive_hook_dir = format( "{stack_root}/{current_version}/atlas/hook/hive/") if not os.path.exists(atlas_hive_hook_dir): Logger.error( format( "ERROR. Atlas is installed in cluster but this Oozie server doesn't " "contain directory {atlas_hive_hook_dir}")) return atlas_hive_hook_impl_dir = os.path.join(atlas_hive_hook_dir, "atlas-hive-plugin-impl") num_files = len([ name for name in os.listdir(atlas_hive_hook_impl_dir) if os.path.exists(os.path.join(atlas_hive_hook_impl_dir, name)) ]) Logger.info( "Found %d files/directories inside Atlas Hive hook impl directory %s" % (num_files, atlas_hive_hook_impl_dir)) # This can return over 100 files, so take the first 5 lines after "Available ShareLib" # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL command = format( r'source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -shareliblist hive | grep "\[Available ShareLib\]" -A 5' ) Execute( command, user=params.oozie_user, tries=10, try_sleep=5, logoutput=True, ) hive_sharelib_dir = __parse_sharelib_from_output(out) if hive_sharelib_dir is None: raise Fail("Could not parse Hive sharelib from output.") Logger.info( format( "Parsed Hive sharelib = {hive_sharelib_dir} and will attempt to copy/replace {num_files} files to it from {atlas_hive_hook_impl_dir}" )) params.HdfsResource(hive_sharelib_dir, type="directory", action="create_on_execute", source=atlas_hive_hook_impl_dir, user=params.hdfs_user, owner=params.oozie_user, group=params.hdfs_user, mode=0755, recursive_chown=True, recursive_chmod=True, replace_existing_files=True) Logger.info( "Copying Atlas Hive hook properties file to Oozie Sharelib in DFS.") atlas_hook_filepath_source = os.path.join(params.hive_conf_dir, params.atlas_hook_filename) atlas_hook_file_path_dest_in_dfs = os.path.join(hive_sharelib_dir, params.atlas_hook_filename) params.HdfsResource(atlas_hook_file_path_dest_in_dfs, type="file", source=atlas_hook_filepath_source, action="create_on_execute", owner=params.oozie_user, group=params.hdfs_user, mode=0755, replace_existing_files=True) params.HdfsResource(None, action="execute") # Update the sharelib after making any changes # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL Execute( format( "source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -sharelibupdate" ), user=params.oozie_user, tries=5, try_sleep=5, logoutput=True, )
def oozie(is_server=False): import params if is_server: params.HdfsResource(params.oozie_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.oozie_user, mode=params.oozie_hdfs_user_mode) params.HdfsResource(None, action="execute") Directory(params.conf_dir, create_parents=True, owner=params.oozie_user, group=params.user_group) XmlConfig( "oozie-site.xml", conf_dir=params.conf_dir, configurations=params.oozie_site, configuration_attributes=params.config['configuration_attributes'] ['oozie-site'], owner=params.oozie_user, group=params.user_group, mode=0664) File( format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user, content=InlineTemplate(params.oozie_env_sh_template), group=params.user_group, ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'oozie.conf'), owner='root', group='root', mode=0644, content=Template("oozie.conf.j2")) if (params.log4j_props != None): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user, content=InlineTemplate(params.log4j_props)) elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user) if params.stack_version_formatted and check_stack_feature( StackFeature.OOZIE_ADMIN_USER, params.stack_version_formatted): File(format("{params.conf_dir}/adminusers.txt"), mode=0644, group=params.user_group, owner=params.oozie_user, content=Template('adminusers.txt.j2', oozie_admin_users=params.oozie_admin_users)) else: File(format("{params.conf_dir}/adminusers.txt"), owner=params.oozie_user, group=params.user_group) if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": File( format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), ) pass oozie_ownership() if is_server: oozie_server_specific()
# server configurations config = Script.get_config() tmp_dir = Script.get_tmp_dir() stack_name = default("/hostLevelParams/stack_name", None) stack_root = Script.get_stack_root() tarball_map = default("/configurations/cluster-env/tarball_map", None) # This is expected to be of the form #.#.#.# stack_version_unformatted = config['hostLevelParams']['stack_version'] stack_version_formatted_major = format_stack_version(stack_version_unformatted) stack_version_formatted = functions.get_stack_version( 'hadoop-yarn-resourcemanager') stack_supports_ru = stack_version_formatted_major and check_stack_feature( StackFeature.ROLLING_UPGRADE, stack_version_formatted_major) stack_supports_timeline_state_store = stack_version_formatted_major and check_stack_feature( StackFeature.TIMELINE_STATE_STORE, stack_version_formatted_major) # New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade. # It cannot be used during the initial Cluser Install because the version is not yet known. version = default("/commandParams/version", None) hostname = config['hostname'] # hadoop default parameters hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec") hadoop_bin = stack_select.get_hadoop_dir("sbin") hadoop_bin_dir = stack_select.get_hadoop_dir("bin") hadoop_conf_dir = conf_select.get_hadoop_conf_dir() hadoop_yarn_home = '/usr/lib/hadoop-yarn'
# server configurations config = Script.get_config() exec_tmp_dir = Script.get_tmp_dir() stack_root = Script.get_stack_root() # Needed since this is an Atlas Hook service. cluster_name = config['clusterName'] java_version = expect("/hostLevelParams/java_version", int) zk_root = default( '/configurations/application-properties/atlas.server.ha.zookeeper.zkroot', '/apache_atlas') stack_supports_zk_security = check_stack_feature( StackFeature.SECURE_ZOOKEEPER, version_for_stack_feature_checks) atlas_kafka_group_id = default( '/configurations/application-properties/atlas.kafka.hook.group.id', None) if security_enabled: _hostname_lowercase = config['hostname'].lower() _atlas_principal_name = config['configurations']['application-properties'][ 'atlas.authentication.principal'] atlas_jaas_principal = _atlas_principal_name.replace( '_HOST', _hostname_lowercase) atlas_keytab_path = config['configurations']['application-properties'][ 'atlas.authentication.keytab'] # New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade version = default("/commandParams/version", None) version_for_stack_feature_checks = get_stack_feature_version(config)
java_home = config['hostLevelParams']['java_home'] stack_name = status_params.stack_name stack_root = Script.get_stack_root() version_for_stack_feature_checks = get_stack_feature_version(config) sysprep_skip_copy_tarballs_hdfs = get_sysprep_skip_copy_tarballs_hdfs() # New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade version = default("/commandParams/version", None) spark_conf = '/etc/spark/conf' hadoop_conf_dir = conf_select.get_hadoop_conf_dir() hadoop_bin_dir = stack_select.get_hadoop_dir("bin") if check_stack_feature(StackFeature.ROLLING_UPGRADE, version_for_stack_feature_checks): hadoop_home = stack_select.get_hadoop_dir("home") spark_conf = format("{stack_root}/current/{component_directory}/conf") spark_log_dir = config['configurations']['spark-env']['spark_log_dir'] spark_daemon_memory = config['configurations']['spark-env'][ 'spark_daemon_memory'] spark_pid_dir = status_params.spark_pid_dir spark_home = format("{stack_root}/current/{component_directory}") spark_thrift_server_conf_file = spark_conf + "/spark-thrift-sparkconf.conf" java_home = config['hostLevelParams']['java_home'] hdfs_user = config['configurations']['hadoop-env']['hdfs_user'] hdfs_principal_name = config['configurations']['hadoop-env'][ 'hdfs_principal_name'] hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
stack_version_unformatted = config['clusterLevelParams']['stack_version'] stack_version_formatted = format_stack_version(stack_version_unformatted) major_stack_version = get_major_version(stack_version_formatted) upgrade_marker_file = format("{tmp_dir}/rangeradmin_ru.inprogress") xml_configurations_supported = config['configurations']['ranger-env'][ 'xml_configurations_supported'] create_db_dbuser = config['configurations']['ranger-env']['create_db_dbuser'] # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config) stack_supports_rolling_upgrade = check_stack_feature( StackFeature.ROLLING_UPGRADE, version_for_stack_feature_checks) stack_supports_config_versioning = check_stack_feature( StackFeature.CONFIG_VERSIONING, version_for_stack_feature_checks) stack_supports_usersync_non_root = check_stack_feature( StackFeature.RANGER_USERSYNC_NON_ROOT, version_for_stack_feature_checks) stack_supports_ranger_tagsync = check_stack_feature( StackFeature.RANGER_TAGSYNC_COMPONENT, version_for_stack_feature_checks) stack_supports_ranger_audit_db = check_stack_feature( StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks) stack_supports_ranger_log4j = check_stack_feature( StackFeature.RANGER_LOG4J_SUPPORT, version_for_stack_feature_checks) stack_supports_ranger_kerberos = check_stack_feature( StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks) stack_supports_usersync_passwd = check_stack_feature( StackFeature.RANGER_USERSYNC_PASSWORD_JCEKS, version_for_stack_feature_checks)
falcon_server_hosts = default("/clusterHostInfo/falcon_server_hosts", []) ranger_admin_hosts = default("/clusterHostInfo/ranger_admin_hosts", []) zeppelin_master_hosts = default("/clusterHostInfo/zeppelin_master_hosts", []) # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config) has_namenode = not len(namenode_host) == 0 has_ganglia_server = not len(ganglia_server_hosts) == 0 has_tez = 'tez-site' in config['configurations'] has_hbase_masters = not len(hbase_master_hosts) == 0 has_oozie_server = not len(oozie_servers) == 0 has_falcon_server_hosts = not len(falcon_server_hosts) == 0 has_ranger_admin = not len(ranger_admin_hosts) == 0 has_zeppelin_master = not len(zeppelin_master_hosts) == 0 stack_supports_zk_security = check_stack_feature(StackFeature.SECURE_ZOOKEEPER, version_for_stack_feature_checks) # HDFS High Availability properties dfs_ha_enabled = False dfs_ha_nameservices = default('/configurations/hdfs-site/dfs.internal.nameservices', None) if dfs_ha_nameservices is None: dfs_ha_nameservices = default('/configurations/hdfs-site/dfs.nameservices', None) dfs_ha_namenode_ids = default(format("/configurations/hdfs-site/dfs.ha.namenodes.{dfs_ha_nameservices}"), None) if dfs_ha_namenode_ids: dfs_ha_namemodes_ids_list = dfs_ha_namenode_ids.split(",") dfs_ha_namenode_ids_array_len = len(dfs_ha_namemodes_ids_list) if dfs_ha_namenode_ids_array_len > 1: dfs_ha_enabled = True if has_namenode or dfs_type == 'HCFS':
def setup_ranger_hdfs(upgrade_type=None): import params if params.enable_ranger_hdfs: stack_version = None if upgrade_type is not None: stack_version = params.version if params.retryAble: Logger.info( "HDFS: Setup ranger: command retry enables thus retrying if ranger admin is down !" ) else: Logger.info( "HDFS: Setup ranger: command retry not enabled thus skipping if ranger admin is down !" ) if params.xml_configurations_supported: from resource_management.libraries.functions.adh_setup_ranger_plugin_xml import setup_ranger_plugin api_version = None if params.stack_supports_ranger_kerberos: api_version = 'v2' setup_ranger_plugin( 'hadoop-client', 'hdfs', params.previous_jdbc_jar, params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java_home, params.repo_name, params.hdfs_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_hdfs, conf_dict=params.hadoop_conf_dir, component_user=params.hdfs_user, component_group=params.user_group, cache_service_list=['hdfs'], plugin_audit_properties=params.config['configurations'] ['ranger-hdfs-audit'], plugin_audit_attributes=params. config['configuration_attributes']['ranger-hdfs-audit'], plugin_security_properties=params.config['configurations'] ['ranger-hdfs-security'], plugin_security_attributes=params. config['configuration_attributes']['ranger-hdfs-security'], plugin_policymgr_ssl_properties=params.config['configurations'] ['ranger-hdfs-policymgr-ssl'], plugin_policymgr_ssl_attributes=params.config[ 'configuration_attributes']['ranger-hdfs-policymgr-ssl'], component_list=['hadoop-client'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, api_version=api_version, stack_version_override=stack_version, skip_if_rangeradmin_down=not params.retryAble, is_security_enabled=params.security_enabled, is_stack_supports_ranger_kerberos=params. stack_supports_ranger_kerberos, component_user_principal=params.nn_principal_name if params.security_enabled else None, component_user_keytab=params.nn_keytab if params.security_enabled else None) else: from resource_management.libraries.functions.adh_setup_ranger_plugin import setup_ranger_plugin setup_ranger_plugin( 'hadoop-client', 'hdfs', params.previous_jdbc_jar, params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java_home, params.repo_name, params.hdfs_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_hdfs, conf_dict=params.hadoop_conf_dir, component_user=params.hdfs_user, component_group=params.user_group, cache_service_list=['hdfs'], plugin_audit_properties=params.config['configurations'] ['ranger-hdfs-audit'], plugin_audit_attributes=params. config['configuration_attributes']['ranger-hdfs-audit'], plugin_security_properties=params.config['configurations'] ['ranger-hdfs-security'], plugin_security_attributes=params. config['configuration_attributes']['ranger-hdfs-security'], plugin_policymgr_ssl_properties=params.config['configurations'] ['ranger-hdfs-policymgr-ssl'], plugin_policymgr_ssl_attributes=params.config[ 'configuration_attributes']['ranger-hdfs-policymgr-ssl'], component_list=['hadoop-client'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, stack_version_override=stack_version, skip_if_rangeradmin_down=not params.retryAble) if stack_version and params.upgrade_direction == Direction.UPGRADE: # when upgrading to stack remove_ranger_hdfs_plugin_env, this env file must be removed if check_stack_feature(StackFeature.REMOVE_RANGER_HDFS_PLUGIN_ENV, stack_version): source_file = os.path.join(params.hadoop_conf_dir, 'set-hdfs-plugin-env.sh') target_file = source_file + ".bak" Execute(("mv", source_file, target_file), sudo=True, only_if=format("test -f {source_file}")) else: Logger.info('Ranger Hdfs plugin is not enabled')
stack_name = status_params.stack_name upgrade_direction = default("/commandParams/upgrade_direction", Direction.UPGRADE) version = default("/commandParams/version", None) agent_stack_retry_on_unavailability = config['hostLevelParams'][ 'agent_stack_retry_on_unavailability'] agent_stack_retry_count = expect("/hostLevelParams/agent_stack_retry_count", int) storm_component_home_dir = status_params.storm_component_home_dir conf_dir = status_params.conf_dir stack_version_unformatted = status_params.stack_version_unformatted stack_version_formatted = status_params.stack_version_formatted stack_supports_ru = stack_version_formatted and check_stack_feature( StackFeature.ROLLING_UPGRADE, stack_version_formatted) stack_supports_storm_kerberos = stack_version_formatted and check_stack_feature( StackFeature.STORM_KERBEROS, stack_version_formatted) stack_supports_storm_ams = stack_version_formatted and check_stack_feature( StackFeature.STORM_AMS, stack_version_formatted) stack_supports_ranger_kerberos = stack_version_formatted and check_stack_feature( StackFeature.RANGER_KERBEROS_SUPPORT, stack_version_formatted) # default hadoop params rest_lib_dir = "/usr/lib/storm/contrib/storm-rest" storm_bin_dir = "/usr/bin" storm_lib_dir = "/usr/lib/storm/lib/" # hadoop parameters for 2.2+ if stack_supports_ru: rest_lib_dir = format("{storm_component_home_dir}/contrib/storm-rest")
major_stack_version = get_major_version(stack_version_formatted_major) # New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade. # It cannot be used during the initial Cluser Install because the version is not yet known. version = default("/commandParams/version", None) # When downgrading the 'version' is pointing to the downgrade-target version # downgrade_from_version provides the source-version the downgrade is happening from downgrade_from_version = upgrade_summary.get_downgrade_from_version("HIVE") # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config) # Upgrade direction upgrade_direction = default("/commandParams/upgrade_direction", None) stack_supports_ranger_kerberos = check_stack_feature( StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks) stack_supports_ranger_audit_db = check_stack_feature( StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks) stack_supports_ranger_hive_jdbc_url_change = check_stack_feature( StackFeature.RANGER_HIVE_PLUGIN_JDBC_URL, version_for_stack_feature_checks) stack_supports_atlas_hook_for_hive_interactive = check_stack_feature( StackFeature.HIVE_INTERACTIVE_ATLAS_HOOK_REQUIRED, version_for_stack_feature_checks) stack_supports_hive_interactive_ga = check_stack_feature( StackFeature.HIVE_INTERACTIVE_GA_SUPPORT, version_for_stack_feature_checks) # component ROLE directory (like hive-metastore or hive-server2-hive2) component_directory = status_params.component_directory component_directory_interactive = status_params.component_directory_interactive hadoop_home = format('{stack_root}/current/hadoop-client')
kafka_home = '/usr/lib/kafka' kafka_bin = kafka_home + '/bin/kafka' conf_dir = "/etc/kafka/conf" limits_conf_dir = "/etc/security/limits.d" # Used while upgrading the stack in a kerberized cluster and running kafka-acls.sh zookeeper_connect = default("/configurations/kafka-broker/zookeeper.connect", None) kafka_user_nofile_limit = config['configurations']['kafka-env'][ 'kafka_user_nofile_limit'] kafka_user_nproc_limit = config['configurations']['kafka-env'][ 'kafka_user_nproc_limit'] # parameters for 2.2+ if stack_version_formatted and check_stack_feature( StackFeature.ROLLING_UPGRADE, stack_version_formatted): kafka_home = os.path.join(stack_root, "current", "kafka-broker") kafka_bin = os.path.join(kafka_home, "bin", "kafka") conf_dir = os.path.join(kafka_home, "config") kafka_user = config['configurations']['kafka-env']['kafka_user'] kafka_log_dir = config['configurations']['kafka-env']['kafka_log_dir'] kafka_pid_dir = status_params.kafka_pid_dir kafka_pid_file = kafka_pid_dir + "/kafka.pid" # This is hardcoded on the kafka bash process lifecycle on which we have no control over kafka_managed_pid_dir = "/var/run/kafka" kafka_managed_log_dir = "/var/log/kafka" user_group = config['configurations']['cluster-env']['user_group'] java64_home = config['hostLevelParams']['java_home'] kafka_env_sh_template = config['configurations']['kafka-env']['content'] kafka_hosts = config['clusterHostInfo']['kafka_broker_hosts']
tmp_dir = Script.get_tmp_dir() stack_name = status_params.stack_name upgrade_direction = default("/commandParams/upgrade_direction", None) version = default("/commandParams/version", None) # E.g., 2.3.2.0 version_formatted = format_stack_version(version) # E.g., 2.3 stack_version_unformatted = config['hostLevelParams']['stack_version'] stack_version_formatted = format_stack_version(stack_version_unformatted) # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config) stack_supports_ranger_kerberos = check_stack_feature( StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks) stack_supports_ranger_audit_db = check_stack_feature( StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks) stack_supports_core_site_for_ranger_plugin = check_stack_feature( StackFeature.CORE_SITE_FOR_RANGER_PLUGINS_SUPPORT, version_for_stack_feature_checks) # This is the version whose state is CURRENT. During an RU, this is the source version. # DO NOT format it since we need the build number too. upgrade_from_version = upgrade_summary.get_source_version() source_stack = default("/commandParams/source_stack", None) if source_stack is None: source_stack = upgrade_summary.get_source_stack("KNOX") source_stack_name = get_stack_name(source_stack) if source_stack_name is not None and source_stack_name != stack_name:
def webhcat(): import params Directory(params.templeton_pid_dir, owner=params.webhcat_user, mode=0755, group=params.user_group, create_parents=True) Directory(params.templeton_log_dir, owner=params.webhcat_user, mode=0755, group=params.user_group, create_parents=True) Directory(params.config_dir, create_parents=True, owner=params.webhcat_user, group=params.user_group, cd_access="a") if params.security_enabled: kinit_if_needed = format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};") else: kinit_if_needed = "" if kinit_if_needed: Execute(kinit_if_needed, user=params.webhcat_user, path='/bin') # Replace _HOST with hostname in relevant principal-related properties webhcat_site = params.config['configurations']['webhcat-site'].copy() for prop_name in [ 'templeton.hive.properties', 'templeton.kerberos.principal' ]: if prop_name in webhcat_site: webhcat_site[prop_name] = webhcat_site[prop_name].replace( "_HOST", params.hostname) XmlConfig( "webhcat-site.xml", conf_dir=params.config_dir, configurations=webhcat_site, configuration_attributes=params.config['configuration_attributes'] ['webhcat-site'], owner=params.webhcat_user, group=params.user_group, ) # if we're in an upgrade of a secure cluster, make sure hive-site and yarn-site are created if params.stack_version_formatted_major and check_stack_feature(StackFeature.CONFIG_VERSIONING, params.stack_version_formatted_major) and \ params.version and params.stack_root: XmlConfig( "hive-site.xml", conf_dir=params.config_dir, configurations=params.config['configurations']['hive-site'], configuration_attributes=params.config['configuration_attributes'] ['hive-site'], owner=params.hive_user, group=params.user_group, ) XmlConfig( "yarn-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['yarn-site'], configuration_attributes=params.config['configuration_attributes'] ['yarn-site'], owner=params.yarn_user, group=params.user_group, ) File(format("{config_dir}/webhcat-env.sh"), owner=params.webhcat_user, group=params.user_group, content=InlineTemplate(params.webhcat_env_sh_template)) Directory(params.webhcat_conf_dir, cd_access='a', create_parents=True) log4j_webhcat_filename = 'webhcat-log4j.properties' if (params.log4j_webhcat_props != None): File(format("{config_dir}/{log4j_webhcat_filename}"), mode=0644, group=params.user_group, owner=params.webhcat_user, content=params.log4j_webhcat_props) elif (os.path.exists("{config_dir}/{log4j_webhcat_filename}.template")): File(format("{config_dir}/{log4j_webhcat_filename}"), mode=0644, group=params.user_group, owner=params.webhcat_user, content=StaticFile( format("{config_dir}/{log4j_webhcat_filename}.template"))) # Generate atlas-application.properties.xml file if params.enable_atlas_hook: script_path = os.path.realpath(__file__).split( '/services')[0] + '/hooks/before-INSTALL/scripts/atlas' sys.path.append(script_path) from setup_atlas_hook import has_atlas_in_cluster, setup_atlas_hook, setup_atlas_jar_symlinks atlas_hook_filepath = os.path.join(params.hive_config_dir, params.atlas_hook_filename) setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.hive_user, params.user_group) setup_atlas_jar_symlinks("hive", params.hcat_lib)
def hbase(name=None): import params # ensure that matching LZO libraries are installed for HBase lzo_utils.install_lzo_if_needed() Directory( params.etc_prefix_dir, mode=0755 ) Directory( params.hbase_conf_dir, owner = params.hbase_user, group = params.user_group, create_parents = True ) Directory(params.java_io_tmpdir, create_parents = True, mode=0777 ) # If a file location is specified in ioengine parameter, # ensure that directory exists. Otherwise create the # directory with permissions assigned to hbase:hadoop. ioengine_input = params.ioengine_param if ioengine_input != None: if ioengine_input.startswith("file:/"): ioengine_fullpath = ioengine_input[5:] ioengine_dir = os.path.dirname(ioengine_fullpath) Directory(ioengine_dir, owner = params.hbase_user, group = params.user_group, create_parents = True, mode = 0755 ) parent_dir = os.path.dirname(params.tmp_dir) # In case if we have several placeholders in path while ("${" in parent_dir): parent_dir = os.path.dirname(parent_dir) if parent_dir != os.path.abspath(os.sep) : Directory (parent_dir, create_parents = True, cd_access="a", ) Execute(("chmod", "1777", parent_dir), sudo=True) XmlConfig( "hbase-site.xml", conf_dir = params.hbase_conf_dir, configurations = params.config['configurations']['hbase-site'], configuration_attributes=params.config['configuration_attributes']['hbase-site'], owner = params.hbase_user, group = params.user_group ) if check_stack_feature(StackFeature.PHOENIX_CORE_HDFS_SITE_REQUIRED, params.version_for_stack_feature_checks): XmlConfig( "core-site.xml", conf_dir = params.hbase_conf_dir, configurations = params.config['configurations']['core-site'], configuration_attributes=params.config['configuration_attributes']['core-site'], owner = params.hbase_user, group = params.user_group ) if 'hdfs-site' in params.config['configurations']: XmlConfig( "hdfs-site.xml", conf_dir = params.hbase_conf_dir, configurations = params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configuration_attributes']['hdfs-site'], owner = params.hbase_user, group = params.user_group ) else: File(format("{params.hbase_conf_dir}/hdfs-site.xml"), action="delete" ) File(format("{params.hbase_conf_dir}/core-site.xml"), action="delete" ) if 'hbase-policy' in params.config['configurations']: XmlConfig( "hbase-policy.xml", conf_dir = params.hbase_conf_dir, configurations = params.config['configurations']['hbase-policy'], configuration_attributes=params.config['configuration_attributes']['hbase-policy'], owner = params.hbase_user, group = params.user_group ) # Manually overriding ownership of file installed by hadoop package else: File( format("{params.hbase_conf_dir}/hbase-policy.xml"), owner = params.hbase_user, group = params.user_group ) File(format("{hbase_conf_dir}/hbase-env.sh"), owner = params.hbase_user, content=InlineTemplate(params.hbase_env_sh_template), group = params.user_group, ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents = True, owner='root', group='root' ) File(os.path.join(params.limits_conf_dir, 'hbase.conf'), owner='root', group='root', mode=0644, content=Template("hbase.conf.j2") ) hbase_TemplateConfig( params.metric_prop_file_name, tag = 'GANGLIA-MASTER' if name == 'master' else 'GANGLIA-RS' ) hbase_TemplateConfig( 'regionservers') if params.security_enabled: hbase_TemplateConfig( format("hbase_{name}_jaas.conf")) if name != "client": Directory( params.pid_dir, owner = params.hbase_user, create_parents = True, cd_access = "a", mode = 0755, ) Directory (params.log_dir, owner = params.hbase_user, create_parents = True, cd_access = "a", mode = 0755, ) if (params.log4j_props != None): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user, content=InlineTemplate(params.log4j_props) ) elif (os.path.exists(format("{params.hbase_conf_dir}/log4j.properties"))): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user ) if name == "master": if not params.hbase_hdfs_root_dir_protocol or params.hbase_hdfs_root_dir_protocol == urlparse(params.default_fs).scheme: params.HdfsResource(params.hbase_hdfs_root_dir, type="directory", action="create_on_execute", owner=params.hbase_user ) params.HdfsResource(params.hbase_staging_dir, type="directory", action="create_on_execute", owner=params.hbase_user, mode=0711 ) if params.create_hbase_home_directory: params.HdfsResource(params.hbase_home_directory, type="directory", action="create_on_execute", owner=params.hbase_user, mode=0755 ) params.HdfsResource(None, action="execute") if params.phoenix_enabled: Package(params.phoenix_package, retry_on_repo_unavailability=params.agent_stack_retry_on_unavailability, retry_count=params.agent_stack_retry_count)
def hive_interactive(name=None): import params MB_TO_BYTES = 1048576 # if warehouse directory is in DFS if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse(params.default_fs).scheme: # Create Hive Metastore Warehouse Dir params.HdfsResource(params.hive_apps_whs_dir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.user_group, mode=params.hive_apps_whs_mode ) else: Logger.info(format("Not creating warehouse directory '{hive_apps_whs_dir}', as the location is not in DFS.")) # Create Hive User Dir params.HdfsResource(params.hive_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=params.hive_hdfs_user_mode ) # list of properties that should be excluded from the config # this approach is a compromise against adding a dedicated config # type for hive_server_interactive or needed config groups on a # per component basis exclude_list = ['hive.enforce.bucketing', 'hive.enforce.sorting'] # List of configs to be excluded from hive2 client, but present in Hive2 server. exclude_list_for_hive2_client = ['javax.jdo.option.ConnectionPassword', 'hadoop.security.credential.provider.path'] # Copy Tarballs in HDFS. if params.stack_version_formatted_major and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.stack_version_formatted_major): resource_created = copy_to_hdfs("tez_hive2", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, skip=params.sysprep_skip_copy_tarballs_hdfs) if resource_created: params.HdfsResource(None, action="execute") Directory(params.hive_interactive_etc_dir_prefix, mode=0755 ) Logger.info("Directories to fill with configs: %s" % str(params.hive_conf_dirs_list)) for conf_dir in params.hive_conf_dirs_list: fill_conf_dir(conf_dir) ''' As hive2/hive-site.xml only contains the new + the changed props compared to hive/hive-site.xml, we need to merge hive/hive-site.xml and hive2/hive-site.xml and store it in hive2/hive-site.xml. ''' merged_hive_interactive_site = {} merged_hive_interactive_site.update(params.config['configurations']['hive-site']) merged_hive_interactive_site.update(params.config['configurations']['hive-interactive-site']) for item in exclude_list: if item in merged_hive_interactive_site.keys(): del merged_hive_interactive_site[item] ''' Config 'hive.llap.io.memory.size' calculated value in stack_advisor is in MB as of now. We need to convert it to bytes before we write it down to config file. ''' if 'hive.llap.io.memory.size' in merged_hive_interactive_site.keys(): hive_llap_io_mem_size_in_mb = merged_hive_interactive_site.get("hive.llap.io.memory.size") hive_llap_io_mem_size_in_bytes = long(hive_llap_io_mem_size_in_mb) * MB_TO_BYTES merged_hive_interactive_site['hive.llap.io.memory.size'] = hive_llap_io_mem_size_in_bytes Logger.info("Converted 'hive.llap.io.memory.size' value from '{0} MB' to '{1} Bytes' before writing " "it to config file.".format(hive_llap_io_mem_size_in_mb, hive_llap_io_mem_size_in_bytes)) ''' Hive2 doesn't have support for Atlas, we need to remove the Hook 'org.apache.atlas.hive.hook.HiveHook', which would have come in config 'hive.exec.post.hooks' during the site merge logic, if Atlas is installed. ''' # Generate atlas-application.properties.xml file if params.enable_atlas_hook and params.stack_supports_atlas_hook_for_hive_interactive: Logger.info("Setup for Atlas Hive2 Hook started.") atlas_hook_filepath = os.path.join(params.hive_server_interactive_conf_dir, params.atlas_hook_filename) setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.hive_user, params.user_group) Logger.info("Setup for Atlas Hive2 Hook done.") else: # Required for HDP 2.5 stacks Logger.info("Skipping setup for Atlas Hook, as it is disabled/ not supported.") remove_atlas_hook_if_exists(merged_hive_interactive_site) ''' As tez_hive2/tez-site.xml only contains the new + the changed props compared to tez/tez-site.xml, we need to merge tez/tez-site.xml and tez_hive2/tez-site.xml and store it in tez_hive2/tez-site.xml. ''' merged_tez_interactive_site = {} if 'tez-site' in params.config['configurations']: merged_tez_interactive_site.update(params.config['configurations']['tez-site']) Logger.info("Retrieved 'tez/tez-site' for merging with 'tez_hive2/tez-interactive-site'.") else: Logger.error("Tez's 'tez-site' couldn't be retrieved from passed-in configurations.") merged_tez_interactive_site.update(params.config['configurations']['tez-interactive-site']) XmlConfig("tez-site.xml", conf_dir = params.tez_interactive_config_dir, configurations = merged_tez_interactive_site, configuration_attributes=params.config['configurationAttributes']['tez-interactive-site'], owner = params.tez_interactive_user, group = params.user_group, mode = 0664) ''' Merge properties from hiveserver2-interactive-site into hiveserver2-site ''' merged_hiveserver2_interactive_site = {} if 'hiveserver2-site' in params.config['configurations']: merged_hiveserver2_interactive_site.update(params.config['configurations']['hiveserver2-site']) Logger.info("Retrieved 'hiveserver2-site' for merging with 'hiveserver2-interactive-site'.") else: Logger.error("'hiveserver2-site' couldn't be retrieved from passed-in configurations.") merged_hiveserver2_interactive_site.update(params.config['configurations']['hiveserver2-interactive-site']) # Create config files under /etc/hive2/conf and /etc/hive2/conf/conf.server: # hive-site.xml # hive-env.sh # llap-daemon-log4j2.properties # llap-cli-log4j2.properties # hive-log4j2.properties # hive-exec-log4j2.properties # beeline-log4j2.properties hive2_conf_dirs_list = params.hive_conf_dirs_list hive2_client_conf_path = format("{stack_root}/current/{component_directory}/conf") # Making copy of 'merged_hive_interactive_site' in 'merged_hive_interactive_site_copy', and deleting 'javax.jdo.option.ConnectionPassword' # config from there, as Hive2 client shouldn't have that config. merged_hive_interactive_site_copy = merged_hive_interactive_site.copy() for item in exclude_list_for_hive2_client: if item in merged_hive_interactive_site.keys(): del merged_hive_interactive_site_copy[item] for conf_dir in hive2_conf_dirs_list: mode_identified = 0644 if conf_dir == hive2_client_conf_path else 0600 if conf_dir == hive2_client_conf_path: XmlConfig("hive-site.xml", conf_dir=conf_dir, configurations=merged_hive_interactive_site_copy, configuration_attributes=params.config['configurationAttributes']['hive-interactive-site'], owner=params.hive_user, group=params.user_group, mode=0644) else: merged_hive_interactive_site = update_credential_provider_path(merged_hive_interactive_site, 'hive-site', os.path.join(conf_dir, 'hive-site.jceks'), params.hive_user, params.user_group ) XmlConfig("hive-site.xml", conf_dir=conf_dir, configurations=merged_hive_interactive_site, configuration_attributes=params.config['configurationAttributes']['hive-interactive-site'], owner=params.hive_user, group=params.user_group, mode=0600) XmlConfig("hiveserver2-site.xml", conf_dir=conf_dir, configurations=merged_hiveserver2_interactive_site, configuration_attributes=params.config['configurationAttributes']['hiveserver2-interactive-site'], owner=params.hive_user, group=params.user_group, mode=mode_identified) hive_server_interactive_conf_dir = conf_dir File(format("{hive_server_interactive_conf_dir}/hive-env.sh"), owner=params.hive_user, group=params.user_group, mode=mode_identified, content=InlineTemplate(params.hive_interactive_env_sh_template)) llap_daemon_log4j_filename = 'llap-daemon-log4j2.properties' File(format("{hive_server_interactive_conf_dir}/{llap_daemon_log4j_filename}"), mode=mode_identified, group=params.user_group, owner=params.hive_user, content=InlineTemplate(params.llap_daemon_log4j)) llap_cli_log4j2_filename = 'llap-cli-log4j2.properties' File(format("{hive_server_interactive_conf_dir}/{llap_cli_log4j2_filename}"), mode=mode_identified, group=params.user_group, owner=params.hive_user, content=InlineTemplate(params.llap_cli_log4j2)) hive_log4j2_filename = 'hive-log4j2.properties' File(format("{hive_server_interactive_conf_dir}/{hive_log4j2_filename}"), mode=mode_identified, group=params.user_group, owner=params.hive_user, content=InlineTemplate(params.hive_log4j2)) hive_exec_log4j2_filename = 'hive-exec-log4j2.properties' File(format("{hive_server_interactive_conf_dir}/{hive_exec_log4j2_filename}"), mode=mode_identified, group=params.user_group, owner=params.hive_user, content=InlineTemplate(params.hive_exec_log4j2)) beeline_log4j2_filename = 'beeline-log4j2.properties' File(format("{hive_server_interactive_conf_dir}/{beeline_log4j2_filename}"), mode=mode_identified, group=params.user_group, owner=params.hive_user, content=InlineTemplate(params.beeline_log4j2)) File(os.path.join(hive_server_interactive_conf_dir, "hadoop-metrics2-hiveserver2.properties"), owner=params.hive_user, group=params.user_group, mode=mode_identified, content=Template("hadoop-metrics2-hiveserver2.properties.j2") ) File(format("{hive_server_interactive_conf_dir}/hadoop-metrics2-llapdaemon.properties"), owner=params.hive_user, group=params.user_group, mode=mode_identified, content=Template("hadoop-metrics2-llapdaemon.j2")) File(format("{hive_server_interactive_conf_dir}/hadoop-metrics2-llaptaskscheduler.properties"), owner=params.hive_user, group=params.user_group, mode=mode_identified, content=Template("hadoop-metrics2-llaptaskscheduler.j2")) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents = True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'hive.conf'), owner='root', group='root', mode=0644, content=Template("hive.conf.j2")) if not os.path.exists(params.target_hive_interactive): jdbc_connector(params.target_hive_interactive, params.hive_intaractive_previous_jdbc_jar) File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content = DownloadSource(format("{jdk_location}/{check_db_connection_jar_name}")), mode = 0644) File(params.start_hiveserver2_interactive_path, mode=0755, content=Template(format('{start_hiveserver2_interactive_script}'))) Directory(params.hive_pid_dir, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755) Directory(params.hive_log_dir, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755) Directory(params.hive_interactive_var_lib, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755)
def service(action=None, name=None, user=None, options="", create_pid_dir=False, create_log_dir=False): """ :param action: Either "start" or "stop" :param name: Component name, e.g., "namenode", "datanode", "secondarynamenode", "zkfc" :param user: User to run the command as :param options: Additional options to pass to command as a string :param create_pid_dir: Create PID directory :param create_log_dir: Crate log file directory """ import params options = options if options else "" pid_dir = format("{hadoop_pid_dir_prefix}/{user}") pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid") hadoop_env_exports = { 'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir } log_dir = format("{hdfs_log_dir_prefix}/{user}") # NFS GATEWAY is always started by root using jsvc due to rpcbind bugs # on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542 if name == "nfs3" : pid_file = format("{pid_dir}/hadoop_privileged_nfs3.pid") custom_export = { 'HADOOP_PRIVILEGED_NFS_USER': params.hdfs_user, 'HADOOP_PRIVILEGED_NFS_PID_DIR': pid_dir, 'HADOOP_PRIVILEGED_NFS_LOG_DIR': log_dir } hadoop_env_exports.update(custom_export) process_id_exists_command = as_sudo(["test", "-f", pid_file]) + " && " + as_sudo(["pgrep", "-F", pid_file]) # on STOP directories shouldn't be created # since during stop still old dirs are used (which were created during previous start) if action != "stop": if name == "nfs3": Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.root_user, group=params.root_group ) else: Directory(params.hadoop_pid_dir_prefix, mode=0755, owner=params.hdfs_user, group=params.user_group ) if create_pid_dir: Directory(pid_dir, owner=user, group=params.user_group, create_parents = True) if create_log_dir: if name == "nfs3": Directory(log_dir, mode=0775, owner=params.root_user, group=params.user_group) else: Directory(log_dir, owner=user, group=params.user_group, create_parents = True) if params.security_enabled and name == "datanode": ## The directory where pid files are stored in the secure data environment. hadoop_secure_dn_pid_dir = format("{hadoop_pid_dir_prefix}/{hdfs_user}") hadoop_secure_dn_pid_file = format("{hadoop_secure_dn_pid_dir}/hadoop_secure_dn.pid") # At datanode_non_root stack version and further, we may start datanode as a non-root even in secure cluster if not (params.stack_version_formatted and check_stack_feature(StackFeature.DATANODE_NON_ROOT, params.stack_version_formatted)) or params.secure_dn_ports_are_in_use: user = "******" pid_file = format( "{hadoop_pid_dir_prefix}/{hdfs_user}/hadoop-{hdfs_user}-{name}.pid") if action == 'stop' and (params.stack_version_formatted and check_stack_feature(StackFeature.DATANODE_NON_ROOT, params.stack_version_formatted)) and \ os.path.isfile(hadoop_secure_dn_pid_file): # We need special handling for this case to handle the situation # when we configure non-root secure DN and then restart it # to handle new configs. Otherwise we will not be able to stop # a running instance user = "******" try: check_process_status(hadoop_secure_dn_pid_file) custom_export = { 'HADOOP_SECURE_DN_USER': params.hdfs_user } hadoop_env_exports.update(custom_export) except ComponentIsNotRunning: pass hadoop_daemon = format("{hadoop_bin}/hadoop-daemon.sh") if user == "root": cmd = [hadoop_daemon, "--config", params.hadoop_conf_dir, action, name] if options: cmd += [options, ] daemon_cmd = as_sudo(cmd) else: cmd = format("{ulimit_cmd} {hadoop_daemon} --config {hadoop_conf_dir} {action} {name}") if options: cmd += " " + options daemon_cmd = as_user(cmd, user) if action == "start": # remove pid file from dead process File(pid_file, action="delete", not_if=process_id_exists_command) try: Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports) except: show_logs(log_dir, user) raise elif action == "stop": try: Execute(daemon_cmd, only_if=process_id_exists_command, environment=hadoop_env_exports) except: show_logs(log_dir, user) raise File(pid_file, action="delete")
def upgrade_schema(self, env): """ Executes the schema upgrade binary. This is its own function because it could be called as a standalone task from the upgrade pack, but is safe to run it for each metastore instance. The schema upgrade on an already upgraded metastore is a NOOP. The metastore schema upgrade requires a database driver library for most databases. During an upgrade, it's possible that the library is not present, so this will also attempt to copy/download the appropriate driver. This function will also ensure that configurations are written out to disk before running since the new configs will most likely not yet exist on an upgrade. Should not be invoked for a DOWNGRADE; Metastore only supports schema upgrades. """ Logger.info("Upgrading Hive Metastore Schema") import status_params import params env.set_params(params) # ensure that configurations are written out before trying to upgrade the schema # since the schematool needs configs and doesn't know how to use the hive conf override self.configure(env) if params.security_enabled: cached_kinit_executor(status_params.kinit_path_local, status_params.hive_user, params.hive_metastore_keytab_path, params.hive_metastore_principal, status_params.hostname, status_params.tmp_dir) # ensure that the JDBC drive is present for the schema tool; if it's not # present, then download it first if params.hive_jdbc_driver in params.hive_jdbc_drivers_list: target_directory = format("{stack_root}/{version}/hive/lib") # download it if it does not exist if not os.path.exists(params.source_jdbc_file): jdbc_connector(params.hive_jdbc_target, params.hive_previous_jdbc_jar) target_directory_and_filename = os.path.join( target_directory, os.path.basename(params.source_jdbc_file)) if params.sqla_db_used: target_native_libs_directory = format( "{target_directory}/native/lib64") Execute( format( "yes | {sudo} cp {jars_in_hive_lib} {target_directory}" )) Directory(target_native_libs_directory, create_parents=True) Execute( format( "yes | {sudo} cp {libs_in_hive_lib} {target_native_libs_directory}" )) Execute( format( "{sudo} chown -R {hive_user}:{user_group} {hive_lib}/*" )) else: # copy the JDBC driver from the older metastore location to the new location only # if it does not already exist if not os.path.exists(target_directory_and_filename): Execute(('cp', params.source_jdbc_file, target_directory), path=["/bin", "/usr/bin/"], sudo=True) File(target_directory_and_filename, mode=0644) # build the schema tool command binary = format("{hive_schematool_ver_bin}/schematool") # the conf.server directory changed locations between stack versions # since the configurations have not been written out yet during an upgrade # we need to choose the original legacy location schematool_hive_server_conf_dir = params.hive_server_conf_dir if not (check_stack_feature(StackFeature.CONFIG_VERSIONING, params.version_for_stack_feature_checks)): schematool_hive_server_conf_dir = LEGACY_HIVE_SERVER_CONF env_dict = {'HIVE_CONF_DIR': schematool_hive_server_conf_dir} command = format( "{binary} -dbType {hive_metastore_db_type} -upgradeSchema") Execute(command, user=params.hive_user, tries=1, environment=env_dict, logoutput=True)
from ambari_commons.os_check import OSCheck from resource_management.libraries.script.script import Script from resource_management.libraries.functions import get_kinit_path from resource_management.libraries.functions.get_not_managed_resources import get_not_managed_resources from resource_management.libraries.resources.hdfs_resource import HdfsResource from resource_management.libraries.functions.stack_features import check_stack_feature from resource_management.libraries.functions.stack_features import get_stack_feature_version from resource_management.libraries.functions import StackFeature from ambari_commons.constants import AMBARI_SUDO_BINARY config = Script.get_config() tmp_dir = Script.get_tmp_dir() artifact_dir = tmp_dir + "/AMBARI-artifacts" version_for_stack_feature_checks = get_stack_feature_version(config) stack_supports_hadoop_custom_extensions = check_stack_feature( StackFeature.HADOOP_CUSTOM_EXTENSIONS, version_for_stack_feature_checks) sudo = AMBARI_SUDO_BINARY # Global flag enabling or disabling the sysprep feature host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False) # Whether to skip copying fast-hdfs-resource.jar to /var/lib/ambari-agent/lib/ # This is required if tarballs are going to be copied to HDFS, so set to False sysprep_skip_copy_fast_jar_hdfs = host_sys_prepped and default( "/configurations/cluster-env/sysprep_skip_copy_fast_jar_hdfs", False) # Whether to skip setting up the unlimited key JCE policy sysprep_skip_setup_jce = host_sys_prepped and default( "/configurations/cluster-env/sysprep_skip_setup_jce", False)
config = Script.get_config() exec_tmp_dir = Script.get_tmp_dir() sudo = AMBARI_SUDO_BINARY stack_name = default("/hostLevelParams/stack_name", None) retryAble = default("/commandParams/command_retry_enabled", False) version = default("/commandParams/version", None) stack_version_unformatted = str(config['hostLevelParams']['stack_version']) stack_version = format_stack_version(stack_version_unformatted) stack_root = status_params.stack_root # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config) stack_supports_ranger_audit_db = check_stack_feature( StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks) component_directory = status_params.component_directory #hadoop params hadoop_bin_dir = stack_select.get_hadoop_dir("bin") hadoop_conf_dir = conf_select.get_hadoop_conf_dir() daemon_script = format( '/usr/iop/current/{component_directory}/bin/hbase-daemon.sh') region_mover = format( '/usr/iop/current/{component_directory}/bin/region_mover.rb') region_drainer = format( '/usr/iop/current/{component_directory}/bin/draining_servers.rb') hbase_cmd = format('/usr/iop/current/{component_directory}/bin/hbase') limits_conf_dir = "/etc/security/limits.d"
def hive_service(name, action='start', upgrade_type=None): import params import status_params if name == 'metastore': pid_file = status_params.hive_metastore_pid cmd = format( "{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.err {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) elif name == 'hiveserver2': pid_file = status_params.hive_pid cmd = format( "{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.err {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) if params.security_enabled and check_stack_feature( StackFeature.HIVE_SERVER2_KERBERIZED_ENV, params.version_for_stack_feature_checks): hive_kinit_cmd = format( "{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; " ) Execute(hive_kinit_cmd, user=params.hive_user) pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=params.hive_user, is_checked_call=False)[1] process_id_exists_command = format( "ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if action == 'start': if name == 'hiveserver2': check_fs_root(params.hive_server_conf_dir, params.execute_path) daemon_cmd = cmd hadoop_home = params.hadoop_home hive_bin = "hive" # upgrading hiveserver2 (rolling_restart) means that there is an existing, # de-registering hiveserver2; the pid will still exist, but the new # hiveserver is spinning up on a new port, so the pid will be re-written if upgrade_type == UPGRADE_TYPE_ROLLING: process_id_exists_command = None if params.version and params.stack_root: hadoop_home = format("{stack_root}/{version}/hadoop") hive_bin = os.path.join(params.hive_bin, hive_bin) Execute(daemon_cmd, user=params.hive_user, environment={ 'HADOOP_HOME': hadoop_home, 'JAVA_HOME': params.java64_home, 'HIVE_BIN': hive_bin }, path=params.execute_path, not_if=process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": validation_called = False if params.hive_jdbc_target is not None: validation_called = True validate_connection(params.hive_jdbc_target, params.hive_lib) if params.hive2_jdbc_target is not None: validation_called = True validate_connection(params.hive2_jdbc_target, params.hive_server2_hive2_lib) if not validation_called: emessage = "ERROR! DB connection check should be executed at least one time!" Logger.error(emessage) elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}") Execute(daemon_kill_cmd, not_if=format("! ({process_id_exists_command})")) wait_time = 5 Execute( daemon_hard_kill_cmd, not_if=format( "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )" ), ignore_failures=True) try: # check if stopped the process, else fail the task Execute( format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) except: show_logs(params.hive_log_dir, params.hive_user) raise File(pid_file, action="delete")
major_stack_version = get_major_version(stack_version_formatted) # e.g. 2.3.0.0-2130 full_stack_version = default("/commandParams/version", None) spark_client_version = get_stack_version('spark-client') hbase_master_hosts = default("/clusterHostInfo/hbase_master_hosts", []) livy_hosts = default("/clusterHostInfo/livy_server_hosts", []) livy2_hosts = default("/clusterHostInfo/livy2_server_hosts", []) livy_livyserver_host = None livy_livyserver_port = None livy2_livyserver_host = None livy2_livyserver_port = None if stack_version_formatted and check_stack_feature(StackFeature.SPARK_LIVY, stack_version_formatted) and \ len(livy_hosts) > 0: livy_livyserver_host = str(livy_hosts[0]) livy_livyserver_port = config['configurations']['livy-conf']['livy.server.port'] if stack_version_formatted and check_stack_feature(StackFeature.SPARK_LIVY2, stack_version_formatted) and \ len(livy2_hosts) > 0: livy2_livyserver_host = str(livy2_hosts[0]) livy2_livyserver_port = config['configurations']['livy2-conf']['livy.server.port'] hdfs_user = config['configurations']['hadoop-env']['hdfs_user'] security_enabled = config['configurations']['cluster-env']['security_enabled'] hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab'] kinit_path_local = get_kinit_path(default('/configurations/kerberos-env/executable_search_paths', None)) hadoop_bin_dir = stack_select.get_hadoop_dir("bin") hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
# server configurations config = Script.get_config() stack_root = Script.get_stack_root() tmp_dir = Script.get_tmp_dir() stack_name = status_params.stack_name upgrade_direction = default("/commandParams/upgrade_direction", None) version = default("/commandParams/version", None) # E.g., 2.3.2.0 version_formatted = format_stack_version(version) # E.g., 2.3 stack_version_unformatted = config['hostLevelParams']['stack_version'] stack_version_formatted = format_stack_version(stack_version_unformatted) stack_supports_ranger_kerberos = stack_version_formatted and check_stack_feature( StackFeature.RANGER_KERBEROS_SUPPORT, stack_version_formatted) # This is the version whose state is CURRENT. During an RU, this is the source version. # DO NOT format it since we need the build number too. upgrade_from_version = default("/hostLevelParams/current_version", None) # server configurations # Default value used in HDP 2.3.0.0 and earlier. knox_data_dir = '/var/lib/knox/data' # Important, it has to be strictly greater than 2.3.0.0!!! Logger.info(format("Stack version to use is {version_formatted}")) if version_formatted and check_stack_feature( StackFeature.KNOX_VERSIONED_DATA_DIR, version_formatted): # This is the current version. In the case of a Rolling Upgrade, it will be the newer version. # In the case of a Downgrade, it will be the version downgrading to.
# there is a stack upgrade which has not yet been finalized; it's currently suspended upgrade_suspended = default("roleParams/upgrade_suspended", False) # New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade version = default("/commandParams/version", None) # The desired role is only available during a Non-Rolling Upgrade in HA. # The server calculates which of the two NameNodes will be the active, and the other the standby since they # are started using different commands. desired_namenode_role = default("/commandParams/desired_namenode_role", None) # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config) stack_supports_ranger_kerberos = check_stack_feature( StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks) stack_supports_ranger_audit_db = check_stack_feature( StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks) stack_supports_zk_security = check_stack_feature( StackFeature.SECURE_ZOOKEEPER, version_for_stack_feature_checks) security_enabled = config['configurations']['cluster-env']['security_enabled'] hdfs_user = status_params.hdfs_user root_user = "******" hadoop_pid_dir_prefix = status_params.hadoop_pid_dir_prefix namenode_pid_file = status_params.namenode_pid_file zkfc_pid_file = status_params.zkfc_pid_file datanode_pid_file = status_params.datanode_pid_file # Some datanode settings dfs_dn_addr = default('/configurations/hdfs-site/dfs.datanode.address', None)
def oozie_server_specific(): import params no_op_test = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ), user=params.oozie_user) File(params.pid_file, action="delete", not_if=no_op_test) oozie_server_directories = [ format("{oozie_home}/{oozie_tmp_dir}"), params.oozie_pid_dir, params.oozie_log_dir, params.oozie_tmp_dir, params.oozie_data_dir, params.oozie_lib_dir, params.oozie_webapps_dir, params.oozie_webapps_conf_dir, params.oozie_server_dir ] Directory( oozie_server_directories, owner=params.oozie_user, group=params.user_group, mode=0755, create_parents=True, cd_access="a", ) Directory( params.oozie_libext_dir, create_parents=True, ) hashcode_file = format("{oozie_home}/.hashcode") skip_recreate_sharelib = format( "test -f {hashcode_file} && test -d {oozie_home}/share") untar_sharelib = ('tar', '-xvf', format('{oozie_home}/oozie-sharelib.tar.gz'), '-C', params.oozie_home) Execute( untar_sharelib, # time-expensive not_if=format("{no_op_test} || {skip_recreate_sharelib}"), sudo=True, ) configure_cmds = [] configure_cmds.append(('cp', params.ext_js_path, params.oozie_libext_dir)) configure_cmds.append(('chown', format('{oozie_user}:{user_group}'), format('{oozie_libext_dir}/{ext_js_file}'))) Execute( configure_cmds, not_if=no_op_test, sudo=True, ) Directory( params.oozie_webapps_conf_dir, owner=params.oozie_user, group=params.user_group, recursive_ownership=True, recursion_follow_links=True, ) # download the database JAR download_database_library_if_needed() #falcon el extension if params.has_falcon_host: Execute(format( '{sudo} cp {falcon_home}/oozie/ext/falcon-oozie-el-extension-*.jar {oozie_libext_dir}' ), not_if=no_op_test) Execute(format( '{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar' ), not_if=no_op_test) if params.lzo_enabled and len(params.all_lzo_packages) > 0: Package(params.all_lzo_packages, retry_on_repo_unavailability=params. agent_stack_retry_on_unavailability, retry_count=params.agent_stack_retry_count) Execute( format( '{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'), not_if=no_op_test, ) prepare_war(params) File( hashcode_file, mode=0644, ) if params.stack_version_formatted and check_stack_feature( StackFeature.OOZIE_CREATE_HIVE_TEZ_CONFIGS, params.stack_version_formatted): # Create hive-site and tez-site configs for oozie Directory(params.hive_conf_dir, create_parents=True, owner=params.oozie_user, group=params.user_group) if 'hive-site' in params.config['configurations']: XmlConfig( "hive-site.xml", conf_dir=params.hive_conf_dir, configurations=params.config['configurations']['hive-site'], configuration_attributes=params. config['configuration_attributes']['hive-site'], owner=params.oozie_user, group=params.user_group, mode=0644) if 'tez-site' in params.config['configurations']: XmlConfig( "tez-site.xml", conf_dir=params.hive_conf_dir, configurations=params.config['configurations']['tez-site'], configuration_attributes=params. config['configuration_attributes']['tez-site'], owner=params.oozie_user, group=params.user_group, mode=0664) # If Atlas is also installed, need to generate Atlas Hive hook (hive-atlas-application.properties file) in directory # {stack_root}/{current_version}/atlas/hook/hive/ # Because this is a .properties file instead of an xml file, it will not be read automatically by Oozie. # However, should still save the file on this host so that can upload it to the Oozie Sharelib in DFS. if has_atlas_in_cluster(): atlas_hook_filepath = os.path.join(params.hive_conf_dir, params.atlas_hook_filename) Logger.info( "Has atlas in cluster, will save Atlas Hive hook into location %s" % str(atlas_hook_filepath)) setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.oozie_user, params.user_group) Directory( params.oozie_server_dir, owner=params.oozie_user, group=params.user_group, recursive_ownership=True, )
tmp_dir = Script.get_tmp_dir() stack_name = status_params.stack_name stack_root = Script.get_stack_root() stack_version_unformatted = config['hostLevelParams']['stack_version'] stack_version_formatted = format_stack_version(stack_version_unformatted) host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False) # New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade version = default("/commandParams/version", None) spark_conf = '/etc/spark2/conf' hadoop_conf_dir = conf_select.get_hadoop_conf_dir() hadoop_bin_dir = stack_select.get_hadoop_dir("bin") if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted): hadoop_home = stack_select.get_hadoop_dir("home") spark_conf = format("{stack_root}/current/{component_directory}/conf") spark_log_dir = config['configurations']['spark2-env']['spark_log_dir'] spark_pid_dir = status_params.spark_pid_dir spark_home = format("{stack_root}/current/{component_directory}") spark_thrift_server_conf_file = spark_conf + "/spark-thrift-sparkconf.conf" java_home = config['hostLevelParams']['java_home'] hdfs_user = config['configurations']['hadoop-env']['hdfs_user'] hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name'] hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab'] user_group = config['configurations']['cluster-env']['user_group'] spark_user = status_params.spark_user
hdfs_tmp_dir = config['configurations']['hadoop-env']['hdfs_tmp_dir'] # hadoop default parameters mapreduce_libs_path = "/usr/lib/hadoop-mapreduce/*" hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec") hadoop_bin = stack_select.get_hadoop_dir("sbin") hadoop_bin_dir = stack_select.get_hadoop_dir("bin") hadoop_home = stack_select.get_hadoop_dir("home") hadoop_secure_dn_user = hdfs_user hadoop_conf_dir = conf_select.get_hadoop_conf_dir() hadoop_conf_secure_dir = os.path.join(hadoop_conf_dir, "secure") hadoop_lib_home = stack_select.get_hadoop_dir("lib") # hadoop parameters for stacks that support rolling_upgrade if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted): mapreduce_libs_path = format("{stack_root}/current/hadoop-mapreduce-client/*") if not security_enabled: hadoop_secure_dn_user = '******' else: dfs_dn_port = utils.get_port(dfs_dn_addr) dfs_dn_http_port = utils.get_port(dfs_dn_http_addr) dfs_dn_https_port = utils.get_port(dfs_dn_https_addr) # We try to avoid inability to start datanode as a plain user due to usage of root-owned ports if dfs_http_policy == "HTTPS_ONLY": secure_dn_ports_are_in_use = utils.is_secure_port(dfs_dn_port) or utils.is_secure_port(dfs_dn_https_port) elif dfs_http_policy == "HTTP_AND_HTTPS": secure_dn_ports_are_in_use = utils.is_secure_port(dfs_dn_port) or utils.is_secure_port(dfs_dn_http_port) or utils.is_secure_port(dfs_dn_https_port) else: # params.dfs_http_policy == "HTTP_ONLY" or not defined: secure_dn_ports_are_in_use = utils.is_secure_port(dfs_dn_port) or utils.is_secure_port(dfs_dn_http_port)
def _valid(stack_name, package, ver): return (ver and check_stack_feature(StackFeature.CONFIG_VERSIONING, ver))
def get_hadoop_conf_dir(force_latest_on_upgrade=False): """ Gets the shared hadoop conf directory using: 1. Start with /etc/hadoop/conf 2. When the stack is greater than HDP-2.2, use <stack-root>/current/hadoop-client/conf 3. Only when doing a RU and HDP-2.3 or higher, use the value as computed by <conf-selector-tool>. This is in the form <stack-root>/VERSION/hadoop/conf to make sure the configs are written in the correct place. However, if the component itself has not yet been upgraded, it should use the hadoop configs from the prior version. This will perform an <stack-selector-tool> status to determine which version to use. :param force_latest_on_upgrade: if True, then force the returned path to always be that of the upgrade target version, even if <stack-selector-tool> has not been called. This is primarily used by hooks like before-ANY to ensure that hadoop environment configurations are written to the correct location since they are written out before the <stack-selector-tool>/<conf-selector-tool> would have been called. """ hadoop_conf_dir = "/etc/hadoop/conf" stack_name = None stack_root = Script.get_stack_root() stack_version = Script.get_stack_version() version = None allow_setting_conf_select_symlink = False if not Script.in_stack_upgrade(): # During normal operation, the HDP stack must be 2.3 or higher if stack_version and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version): hadoop_conf_dir = os.path.join(stack_root, "current", "hadoop-client", "conf") if stack_version and check_stack_feature(StackFeature.CONFIG_VERSIONING, stack_version): hadoop_conf_dir = os.path.join(stack_root, "current", "hadoop-client", "conf") stack_name = default("/hostLevelParams/stack_name", None) version = default("/commandParams/version", None) if stack_name and version: version = str(version) allow_setting_conf_select_symlink = True else: # During an upgrade/downgrade, which can be a Rolling or Express Upgrade, need to calculate it based on the version ''' Whenever upgrading to HDP 2.2, or downgrading back to 2.2, need to use /etc/hadoop/conf Whenever upgrading to HDP 2.3, or downgrading back to 2.3, need to use a versioned hadoop conf dir Type__|_Source_|_Target_|_Direction_____________|_Comment_____________________________________________________________ Normal| | 2.2 | | Use /etc/hadoop/conf Normal| | 2.3 | | Use /etc/hadoop/conf, which should be a symlink to <stack-root>/current/hadoop-client/conf EU | 2.1 | 2.3 | Upgrade | Use versioned <stack-root>/current/hadoop-client/conf | | | No Downgrade Allowed | Invalid EU/RU | 2.2 | 2.2.* | Any | Use <stack-root>/current/hadoop-client/conf EU/RU | 2.2 | 2.3 | Upgrade | Use <stack-root>/$version/hadoop/conf, which should be a symlink destination | | | Downgrade | Use <stack-root>/current/hadoop-client/conf EU/RU | 2.3 | 2.3.* | Any | Use <stack-root>/$version/hadoop/conf, which should be a symlink destination ''' # The "stack_version" is the desired stack, e.g., 2.2 or 2.3 # In an RU, it is always the desired stack, and doesn't change even during the Downgrade! # In an RU Downgrade from HDP 2.3 to 2.2, the first thing we do is # rm /etc/[component]/conf and then mv /etc/[component]/conf.backup /etc/[component]/conf if stack_version and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version): hadoop_conf_dir = os.path.join(stack_root, "current", "hadoop-client", "conf") # This contains the "version", including the build number, that is actually used during a stack upgrade and # is the version upgrading/downgrading to. stack_info = stack_select._get_upgrade_stack() if stack_info is not None: stack_name = stack_info[0] version = stack_info[1] else: raise Fail("Unable to get parameter 'version'") Logger.info("In the middle of a stack upgrade/downgrade for Stack {0} and destination version {1}, determining which hadoop conf dir to use.".format(stack_name, version)) # This is the version either upgrading or downgrading to. if version and check_stack_feature(StackFeature.CONFIG_VERSIONING, version): # Determine if <stack-selector-tool> has been run and if not, then use the current # hdp version until this component is upgraded. if not force_latest_on_upgrade: current_stack_version = stack_select.get_role_component_current_stack_version() if current_stack_version is not None and version != current_stack_version: version = current_stack_version stack_selector_name = stack_tools.get_stack_tool_name(stack_tools.STACK_SELECTOR_NAME) Logger.info("{0} has not yet been called to update the symlink for this component, " "keep using version {1}".format(stack_selector_name, current_stack_version)) # Only change the hadoop_conf_dir path, don't <conf-selector-tool> this older version hadoop_conf_dir = os.path.join(stack_root, version, "hadoop", "conf") Logger.info("Hadoop conf dir: {0}".format(hadoop_conf_dir)) allow_setting_conf_select_symlink = True if allow_setting_conf_select_symlink: # If not in the middle of an upgrade and on HDP 2.3 or higher, or if # upgrading stack to version 2.3.0.0 or higher (which may be upgrade or downgrade), then consider setting the # symlink for /etc/hadoop/conf. # If a host does not have any HDFS or YARN components (e.g., only ZK), then it will not contain /etc/hadoop/conf # Therefore, any calls to <conf-selector-tool> will fail. # For that reason, if the hadoop conf directory exists, then make sure it is set. if os.path.exists(hadoop_conf_dir): conf_selector_name = stack_tools.get_stack_tool_name(stack_tools.CONF_SELECTOR_NAME) Logger.info("The hadoop conf dir {0} exists, will call {1} on it for version {2}".format( hadoop_conf_dir, conf_selector_name, version)) select(stack_name, "hadoop", version) Logger.info("Using hadoop conf dir: {0}".format(hadoop_conf_dir)) return hadoop_conf_dir
def setup_spark(env, type, upgrade_type=None, action=None, config_dir=None): """ :param env: Python environment :param type: Spark component type :param upgrade_type: If in a stack upgrade, either UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING :param action: Action to perform, such as generate configs :param config_dir: Optional config directory to write configs to. """ import params if config_dir is None: config_dir = params.spark_conf Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents=True) if type == 'server' and action == 'config': params.HdfsResource(params.spark_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775) params.HdfsResource(None, action="execute") PropertiesFile( os.path.join(config_dir, "spark-defaults.conf"), properties=params.config['configurations']['spark-defaults'], key_value_delimiter=" ", owner=params.spark_user, group=params.spark_group, mode=0644) # create spark-env.sh in etc/conf dir File( os.path.join(config_dir, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh), mode=0644, ) #create log4j.properties in etc/conf dir File( os.path.join(config_dir, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties, mode=0644, ) #create metrics.properties in etc/conf dir File(os.path.join(config_dir, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties), mode=0644) Directory( params.spark_logs_dir, owner=params.spark_user, group=params.spark_group, mode=0755, ) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=config_dir, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644) if params.has_spark_thriftserver: PropertiesFile(params.spark_thrift_server_conf_file, properties=params.config['configurations'] ['spark-thrift-sparkconf'], owner=params.hive_user, group=params.user_group, key_value_delimiter=" ", mode=0644) effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if effective_version and check_stack_feature( StackFeature.SPARK_JAVA_OPTS_SUPPORT, effective_version): File(os.path.join(params.spark_conf, 'java-opts'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_javaopts_properties), mode=0644) else: File(os.path.join(params.spark_conf, 'java-opts'), action="delete") if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature( StackFeature.SPARK_16PLUS, effective_version): # create spark-thrift-fairscheduler.xml File(os.path.join(config_dir, "spark-thrift-fairscheduler.xml"), owner=params.spark_user, group=params.spark_group, mode=0755, content=InlineTemplate(params.spark_thrift_fairscheduler_content))
def prepare_libext_directory(): """ Performs the following actions on libext: - creates <stack-root>/current/oozie/libext and recursively - set 777 permissions on it and its parents. - downloads JDBC driver JAR if needed - copies Falcon JAR for the Oozie WAR if needed """ import params # some stack versions don't need the lzo compression libraries target_version_needs_compression_libraries = params.version and check_stack_feature( StackFeature.LZO, params.version) # ensure the directory exists Directory(params.oozie_libext_dir, mode=0777) # get all hadooplzo* JAR files # <stack-selector-tool> set hadoop-client has not run yet, therefore we cannot use # <stack-root>/current/hadoop-client ; we must use params.version directly # however, this only works when upgrading beyond 2.2.0.0; don't do this # for downgrade to 2.2.0.0 since hadoop-lzo will not be present # This can also be called during a Downgrade. # When a version is Installed, it is responsible for downloading the hadoop-lzo packages # if lzo is enabled. if params.lzo_enabled and ( params.upgrade_direction == Direction.UPGRADE or target_version_needs_compression_libraries): hadoop_lzo_pattern = 'hadoop-lzo*.jar' hadoop_client_new_lib_dir = format( "{stack_root}/{version}/hadoop/lib") files = glob.iglob( os.path.join(hadoop_client_new_lib_dir, hadoop_lzo_pattern)) if not files: raise Fail("There are no files at {0} matching {1}".format( hadoop_client_new_lib_dir, hadoop_lzo_pattern)) # copy files into libext files_copied = False for file in files: if os.path.isfile(file): Logger.info("Copying {0} to {1}".format( str(file), params.oozie_libext_dir)) shutil.copy2(file, params.oozie_libext_dir) files_copied = True if not files_copied: raise Fail("There are no files at {0} matching {1}".format( hadoop_client_new_lib_dir, hadoop_lzo_pattern)) # something like <stack-root>/current/oozie-server/libext/ext-2.2.zip oozie_ext_zip_target_path = os.path.join(params.oozie_libext_dir, params.ext_js_file) # Copy ext ZIP to libext dir # Default to /usr/share/$TARGETSTACK-oozie/ext-2.2.zip as the first path source_ext_zip_paths = oozie.get_oozie_ext_zip_source_paths( upgrade_type, params) found_at_least_one_oozie_ext_file = False # Copy the first oozie ext-2.2.zip file that is found. # This uses a list to handle the cases when migrating from some versions of BigInsights to HDP. if source_ext_zip_paths is not None: for source_ext_zip_path in source_ext_zip_paths: if os.path.isfile(source_ext_zip_path): found_at_least_one_oozie_ext_file = True Logger.info("Copying {0} to {1}".format( source_ext_zip_path, params.oozie_libext_dir)) Execute( ("cp", source_ext_zip_path, params.oozie_libext_dir), sudo=True) Execute(("chown", format("{oozie_user}:{user_group}"), oozie_ext_zip_target_path), sudo=True) File(oozie_ext_zip_target_path, mode=0644) break if not found_at_least_one_oozie_ext_file: raise Fail( "Unable to find any Oozie source extension files from the following paths {0}" .format(source_ext_zip_paths)) # Redownload jdbc driver to a new current location oozie.download_database_library_if_needed() # get the upgrade version in the event that it's needed upgrade_stack = stack_select._get_upgrade_stack() if upgrade_stack is None or len( upgrade_stack) < 2 or upgrade_stack[1] is None: raise Fail( "Unable to determine the stack that is being upgraded to or downgraded to." ) stack_version = upgrade_stack[1] # copy the Falcon JAR if needed; falcon has not upgraded yet, so we must # use the versioned falcon directory if params.has_falcon_host: versioned_falcon_jar_directory = "{0}/{1}/falcon/oozie/ext/falcon-oozie-el-extension-*.jar".format( params.stack_root, stack_version) Logger.info("Copying {0} to {1}".format( versioned_falcon_jar_directory, params.oozie_libext_dir)) Execute( format( '{sudo} cp {versioned_falcon_jar_directory} {oozie_libext_dir}' )) Execute( format( '{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar' ))
architecture = get_architecture() stack_name = status_params.stack_name stack_root = Script.get_stack_root() tarball_map = default("/configurations/cluster-env/tarball_map", None) config_path = os.path.join(stack_root, "current/hadoop-client/conf") config_dir = os.path.realpath(config_path) # This is expected to be of the form #.#.#.# stack_version_unformatted = config['hostLevelParams']['stack_version'] stack_version_formatted_major = format_stack_version(stack_version_unformatted) stack_version_formatted = functions.get_stack_version( 'hadoop-yarn-resourcemanager') stack_supports_ru = stack_version_formatted_major and check_stack_feature( StackFeature.ROLLING_UPGRADE, stack_version_formatted_major) stack_supports_timeline_state_store = stack_version_formatted_major and check_stack_feature( StackFeature.TIMELINE_STATE_STORE, stack_version_formatted_major) # New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade. # It cannot be used during the initial Cluser Install because the version is not yet known. version = default("/commandParams/version", None) # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config) stack_supports_ranger_kerberos = check_stack_feature( StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks) stack_supports_ranger_audit_db = check_stack_feature( StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)
def spark_service(name, upgrade_type=None, action=None): import params if action == 'start': effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version): # create & copy spark2-hdp-yarn-archive.tar.gz to hdfs source_dir=params.spark_home+"/jars" tmp_archive_file="/tmp/spark2/spark2-hdp-yarn-archive.tar.gz" make_tarfile(tmp_archive_file, source_dir) copy_to_hdfs("spark2", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) # create spark history directory params.HdfsResource(params.spark_history_dir, type="directory", action="create_on_execute", owner=params.spark_user, group=params.user_group, mode=0777, recursive_chmod=True ) params.HdfsResource(None, action="execute") if params.security_enabled: spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ") Execute(spark_kinit_cmd, user=params.spark_user) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted): resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute") if name == 'jobhistoryserver': historyserver_no_op_test = format( 'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1') try: Execute(format('{spark_history_server_start}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}, not_if=historyserver_no_op_test) except: show_logs(params.spark_log_dir, user=params.spark_user) raise elif name == 'sparkthriftserver': if params.security_enabled: hive_principal = params.hive_kerberos_principal.replace('_HOST', socket.getfqdn().lower()) hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; ") Execute(hive_kinit_cmd, user=params.hive_user) thriftserver_no_op_test = format( 'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1') try: Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'), user=params.hive_user, environment={'JAVA_HOME': params.java_home}, not_if=thriftserver_no_op_test ) except: show_logs(params.spark_log_dir, user=params.hive_user) raise elif action == 'stop': if name == 'jobhistoryserver': try: Execute(format('{spark_history_server_stop}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home} ) except: show_logs(params.spark_log_dir, user=params.spark_user) raise File(params.spark_history_server_pid_file, action="delete" ) elif name == 'sparkthriftserver': try: Execute(format('{spark_thrift_server_stop}'), user=params.hive_user, environment={'JAVA_HOME': params.java_home} ) except: show_logs(params.spark_log_dir, user=params.hive_user) raise File(params.spark_thrift_server_pid_file, action="delete" )
stack_name = status_params.stack_name upgrade_direction = default("/commandParams/upgrade_direction", None) version = default("/commandParams/version", None) agent_stack_retry_on_unavailability = config['hostLevelParams'][ 'agent_stack_retry_on_unavailability'] agent_stack_retry_count = expect("/hostLevelParams/agent_stack_retry_count", int) storm_component_home_dir = status_params.storm_component_home_dir conf_dir = status_params.conf_dir stack_version_unformatted = status_params.stack_version_unformatted stack_version_formatted = status_params.stack_version_formatted stack_supports_ru = stack_version_formatted and check_stack_feature( StackFeature.ROLLING_UPGRADE, stack_version_formatted) stack_supports_storm_kerberos = stack_version_formatted and check_stack_feature( StackFeature.STORM_KERBEROS, stack_version_formatted) stack_supports_storm_ams = stack_version_formatted and check_stack_feature( StackFeature.STORM_AMS, stack_version_formatted) stack_supports_core_site_for_ranger_plugin = check_stack_feature( StackFeature.CORE_SITE_FOR_RANGER_PLUGINS_SUPPORT, stack_version_formatted) # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config) stack_supports_ranger_kerberos = check_stack_feature( StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks) stack_supports_ranger_audit_db = check_stack_feature( StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)