def should_install_falcon_atlas_hook(): config = Script.get_config() stack_version_unformatted = config['hostLevelParams']['stack_version'] stack_version_formatted = format_stack_version(stack_version_unformatted) if check_stack_feature(StackFeature.FALCON_ATLAS_SUPPORT_2_3, stack_version_formatted) \ or check_stack_feature(StackFeature.FALCON_ATLAS_SUPPORT, stack_version_formatted): return _has_applicable_local_component(config, ['FALCON_SERVER']) return False
def install_windows_msi(url_base, save_dir, save_files, hadoop_user, hadoop_password, stack_version): global _working_dir _working_dir = save_dir save_dir = os.path.abspath(save_dir) msi_save_dir = save_dir # system wide lock to prevent simultaneous installations(when first task failed on timeout) install_lock = SystemWideLock("Global\\hdp_msi_lock") try: # try to acquire lock if not install_lock.lock(): Logger.info("Some other task currently installing hdp.msi, waiting for 10 min for finish") if not install_lock.lock(600000): raise Fail("Timeout on acquiring lock") if _validate_msi_install(): Logger.info("hdp.msi already installed") return stack_version_formatted = format_stack_version(stack_version) hdp_22_specific_props = '' if stack_version_formatted != "" and compare_versions(stack_version_formatted, '2.2') >= 0: hdp_22_specific_props = hdp_22.format(data_dir=data_dir) # MSIs cannot be larger than 2GB. HDPWIN 2.3 needed split in order to accommodate this limitation msi_file = '' for save_file in save_files: if save_file.lower().endswith(".msi"): msi_file = save_file file_url = urlparse.urljoin(url_base, save_file) try: download_file(file_url, os.path.join(msi_save_dir, save_file)) except: raise Fail("Failed to download {url}".format(url=file_url)) File(os.path.join(msi_save_dir, "properties.txt"), content=cluster_properties.format(log_dir=log_dir, data_dir=data_dir, local_host=local_host, db_flavor=db_flavor, hdp_22_specific_props=hdp_22_specific_props)) # install msi msi_path = os_utils.quote_path(os.path.join(save_dir, msi_file)) log_path = os_utils.quote_path(os.path.join(save_dir, msi_file[:-3] + "log")) layout_path = os_utils.quote_path(os.path.join(save_dir, "properties.txt")) hadoop_password_arg = os_utils.quote_path(hadoop_password) Execute( INSTALL_MSI_CMD.format(msi_path=msi_path, log_path=log_path, layout_path=layout_path, hadoop_user=hadoop_user, hadoop_password_arg=hadoop_password_arg)) reload_windows_env() # create additional services manually due to hdp.msi limitaitons _ensure_services_created(hadoop_user, hadoop_password) _create_symlinks(stack_version) # finalizing install _write_marker() _validate_msi_install() finally: install_lock.unlock()
def get_hadoop_dir(target, force_latest_on_upgrade=False): """ Return the hadoop shared directory in the following override order 1. Use default for 2.1 and lower 2. If 2.2 and higher, use <stack-root>/current/hadoop-client/{target} 3. If 2.2 and higher AND for an upgrade, use <stack-root>/<version>/hadoop/{target}. However, if the upgrade has not yet invoked <stack-selector-tool>, return the current version of the component. :target: the target directory :force_latest_on_upgrade: if True, then this will return the "current" directory without the stack version built into the path, such as <stack-root>/current/hadoop-client """ stack_root = Script.get_stack_root() stack_version = Script.get_stack_version() if not target in HADOOP_DIR_DEFAULTS: raise Fail("Target {0} not defined".format(target)) hadoop_dir = HADOOP_DIR_DEFAULTS[target] formatted_stack_version = format_stack_version(stack_version) if formatted_stack_version and check_stack_feature(StackFeature.ROLLING_UPGRADE, formatted_stack_version): # home uses a different template if target == "home": hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client") else: hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client", target) # if we are not forcing "current" for HDP 2.2, then attempt to determine # if the exact version needs to be returned in the directory if not force_latest_on_upgrade: stack_info = _get_upgrade_stack() if stack_info is not None: stack_version = stack_info[1] # determine if <stack-selector-tool> has been run and if not, then use the current # hdp version until this component is upgraded current_stack_version = get_role_component_current_stack_version() if current_stack_version is not None and stack_version != current_stack_version: stack_version = current_stack_version if target == "home": # home uses a different template hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop") else: hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop", target) return hadoop_dir
def get_stack_version(): """ Gets the normalized version of the stack in the form #.#.#.# if it is present on the configurations sent. :return: a normalized stack version or None """ config = Script.get_config() if 'hostLevelParams' not in config or 'stack_version' not in config['hostLevelParams']: return None stack_version_unformatted = str(config['hostLevelParams']['stack_version']) if stack_version_unformatted is None or stack_version_unformatted == '': return None return format_stack_version(stack_version_unformatted)
def should_expose_component_version(self, command_name): """ Analyzes config and given command to determine if stack version should be written to structured out. Currently only HDP stack versions >= 2.2 are supported. :param command_name: command name :return: True or False """ from resource_management.libraries.functions.default import default stack_version_unformatted = str(default("/hostLevelParams/stack_version", "")) stack_version_formatted = format_stack_version(stack_version_unformatted) if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted): if command_name.lower() == "status": request_version = default("/commandParams/request_version", None) if request_version is not None: return True else: # Populate version only on base commands return command_name.lower() == "start" or command_name.lower() == "install" or command_name.lower() == "restart" return False
def _create_symlinks(stack_version): # folders Execute("cmd /c mklink /d %HADOOP_NODE%\\hadoop %HADOOP_HOME%") Execute("cmd /c mklink /d %HADOOP_NODE%\\hive %HIVE_HOME%") stack_version_formatted = format_stack_version(stack_version) if stack_version_formatted != "" and compare_versions(stack_version_formatted, '2.2') >= 0: Execute("cmd /c mklink /d %HADOOP_NODE%\\knox %KNOX_HOME%") # files pairs (symlink_path, path_template_to_target_file), use * to replace file version links_pairs = [ ("%HADOOP_HOME%\\share\\hadoop\\tools\\lib\\hadoop-streaming.jar", "%HADOOP_HOME%\\share\\hadoop\\tools\\lib\\hadoop-streaming-*.jar"), ("%HIVE_HOME%\\hcatalog\\share\\webhcat\\svr\\lib\\hive-webhcat.jar", "%HIVE_HOME%\\hcatalog\\share\\webhcat\\svr\\lib\\hive-webhcat-*.jar"), ("%HIVE_HOME%\\lib\\zookeeper.jar", "%HIVE_HOME%\\lib\\zookeeper-*.jar") ] for link_pair in links_pairs: link, target = link_pair target = glob.glob(os.path.expandvars(target))[0].replace("\\\\", "\\") Execute('cmd /c mklink "{0}" "{1}"'.format(link, target))
def get_hadoop_dir_for_stack_version(target, stack_version): """ Return the hadoop shared directory for the provided stack version. This is necessary when folder paths of downgrade-source stack-version are needed after <stack-selector-tool>. :target: the target directory :stack_version: stack version to get hadoop dir for """ stack_root = Script.get_stack_root() if not target in HADOOP_DIR_DEFAULTS: raise Fail("Target {0} not defined".format(target)) hadoop_dir = HADOOP_DIR_DEFAULTS[target] formatted_stack_version = format_stack_version(stack_version) if formatted_stack_version and check_stack_feature(StackFeature.ROLLING_UPGRADE, formatted_stack_version): # home uses a different template if target == "home": hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop") else: hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop", target) return hadoop_dir
# for use with <stack-root>/current/<component> SERVER_ROLE_DIRECTORY_MAP = { 'SPARK2_JOBHISTORYSERVER' : 'spark2-historyserver', 'SPARK2_CLIENT' : 'spark2-client', 'SPARK2_THRIFTSERVER' : 'spark2-thriftserver' } component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP, "SPARK2_CLIENT") config = Script.get_config() tmp_dir = Script.get_tmp_dir() stack_name = status_params.stack_name stack_root = Script.get_stack_root() stack_version_unformatted = config['hostLevelParams']['stack_version'] stack_version_formatted = format_stack_version(stack_version_unformatted) host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False) # New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade version = default("/commandParams/version", None) spark_conf = '/etc/spark2/conf' hadoop_conf_dir = conf_select.get_hadoop_conf_dir() hadoop_bin_dir = stack_select.get_hadoop_dir("bin") if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted): hadoop_home = stack_select.get_hadoop_dir("home") spark_conf = format("{stack_root}/current/{component_directory}/conf") spark_log_dir = config['configurations']['spark2-env']['spark_log_dir'] spark_pid_dir = status_params.spark_pid_dir spark_home = format("{stack_root}/current/{component_directory}")
def copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type=None, upgrade_direction=None): """ If the Atlas Hive Hook direcotry is present, Atlas is installed, and this is the first Oozie Server, then copy the entire contents of that directory to the Oozie Sharelib in DFS, e.g., /usr/$stack/$current_version/atlas/hook/hive/ -> hdfs:///user/oozie/share/lib/lib_$timetamp/hive :param upgrade_type: If in the middle of a stack upgrade, the type as UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING :param upgrade_direction: If in the middle of a stack upgrade, the direction as Direction.UPGRADE or Direction.DOWNGRADE. """ import params # Calculate the effective version since this code can also be called during EU/RU in the upgrade direction. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(params.version) if not check_stack_feature(StackFeature.ATLAS_HOOK_SUPPORT, effective_version): return # Important that oozie_server_hostnames is sorted by name so that this only runs on a single Oozie server. if not (len(params.oozie_server_hostnames) > 0 and params.hostname == params.oozie_server_hostnames[0]): Logger.debug("Will not attempt to copy Atlas Hive hook to DFS since this is not the first Oozie Server " "sorted by hostname.") return if not has_atlas_in_cluster(): Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since Atlas is not installed on the cluster.") return if upgrade_type is not None and upgrade_direction == Direction.DOWNGRADE: Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since in the middle of Rolling/Express upgrade " "and performing a Downgrade.") return current_version = get_current_version() atlas_hive_hook_dir = format("{stack_root}/{current_version}/atlas/hook/hive/") if not os.path.exists(atlas_hive_hook_dir): Logger.error(format("ERROR. Atlas is installed in cluster but this Oozie server doesn't " "contain directory {atlas_hive_hook_dir}")) return atlas_hive_hook_impl_dir = os.path.join(atlas_hive_hook_dir, "atlas-hive-plugin-impl") num_files = len([name for name in os.listdir(atlas_hive_hook_impl_dir) if os.path.exists(os.path.join(atlas_hive_hook_impl_dir, name))]) Logger.info("Found %d files/directories inside Atlas Hive hook impl directory %s"% (num_files, atlas_hive_hook_impl_dir)) # This can return over 100 files, so take the first 5 lines after "Available ShareLib" # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL command = format(r'source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -shareliblist hive | grep "\[Available ShareLib\]" -A 5') Execute(command, user=params.oozie_user, tries=10, try_sleep=5, logoutput=True, ) hive_sharelib_dir = __parse_sharelib_from_output(out) if hive_sharelib_dir is None: raise Fail("Could not parse Hive sharelib from output.") Logger.info(format("Parsed Hive sharelib = {hive_sharelib_dir} and will attempt to copy/replace {num_files} files to it from {atlas_hive_hook_impl_dir}")) params.HdfsResource(hive_sharelib_dir, type="directory", action="create_on_execute", source=atlas_hive_hook_impl_dir, user=params.hdfs_user, owner=params.oozie_user, group=params.hdfs_user, mode=0755, recursive_chown=True, recursive_chmod=True, replace_existing_files=True ) Logger.info("Copying Atlas Hive hook properties file to Oozie Sharelib in DFS.") atlas_hook_filepath_source = os.path.join(params.hive_conf_dir, params.atlas_hook_filename) atlas_hook_file_path_dest_in_dfs = os.path.join(hive_sharelib_dir, params.atlas_hook_filename) params.HdfsResource(atlas_hook_file_path_dest_in_dfs, type="file", source=atlas_hook_filepath_source, action="create_on_execute", owner=params.oozie_user, group=params.hdfs_user, mode=0755, replace_existing_files=True ) params.HdfsResource(None, action="execute") # Update the sharelib after making any changes # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL Execute(format("source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -sharelibupdate"), user=params.oozie_user, tries=5, try_sleep=5, logoutput=True, )
limitations under the License. """ from ambari_commons.constants import AMBARI_SUDO_BINARY from resource_management.libraries.functions.version import format_stack_version, compare_versions from resource_management.core.system import System from resource_management.libraries.script.script import Script from resource_management.libraries.functions import default, format config = Script.get_config() tmp_dir = Script.get_tmp_dir() sudo = AMBARI_SUDO_BINARY stack_version_unformatted = str(config['hostLevelParams']['stack_version']) iop_stack_version = format_stack_version(stack_version_unformatted) #users and groups hbase_user = config['configurations']['hbase-env']['hbase_user'] smoke_user = config['configurations']['cluster-env']['smokeuser'] gmetad_user = config['configurations']['ganglia-env']["gmetad_user"] gmond_user = config['configurations']['ganglia-env']["gmond_user"] tez_user = None #config['configurations']['tez-env']["tez_user"] user_group = config['configurations']['cluster-env']['user_group'] proxyuser_group = default("/configurations/hadoop-env/proxyuser_group", "users") hdfs_log_dir_prefix = config['configurations']['hadoop-env'][ 'hdfs_log_dir_prefix']
def spark_service(name, upgrade_type=None, action=None): import params if action == 'start': effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if name == 'jobhistoryserver' and effective_version and check_stack_feature( StackFeature.SPARK_16PLUS, effective_version): # copy spark-hdp-assembly.jar to hdfs copy_to_hdfs("spark", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) # create spark history directory params.HdfsResource(params.spark_history_dir, type="directory", action="create_on_execute", owner=params.spark_user, group=params.user_group, mode=0777, recursive_chmod=True) params.HdfsResource(None, action="execute") if params.security_enabled: spark_kinit_cmd = format( "{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; " ) Execute(spark_kinit_cmd, user=params.spark_user) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.stack_version_formatted and check_stack_feature( StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted): resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute") if name == 'jobhistoryserver': historyserver_no_op_test = format( 'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1' ) try: Execute(format('{spark_history_server_start}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}, not_if=historyserver_no_op_test) except: show_logs(params.spark_log_dir, user=params.spark_user) raise elif name == 'sparkthriftserver': if params.security_enabled: hive_principal = params.hive_kerberos_principal.replace( '_HOST', socket.getfqdn().lower()) hive_kinit_cmd = format( "{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; " ) Execute(hive_kinit_cmd, user=params.hive_user) thriftserver_no_op_test = format( 'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1' ) try: Execute(format( '{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}' ), user=params.hive_user, environment={'JAVA_HOME': params.java_home}, not_if=thriftserver_no_op_test) except: show_logs(params.spark_log_dir, user=params.hive_user) raise elif action == 'stop': if name == 'jobhistoryserver': try: Execute(format('{spark_history_server_stop}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}) except: show_logs(params.spark_log_dir, user=params.spark_user) raise File(params.spark_history_server_pid_file, action="delete") elif name == 'sparkthriftserver': try: Execute(format('{spark_thrift_server_stop}'), user=params.hive_user, environment={'JAVA_HOME': params.java_home}) except: show_logs(params.spark_log_dir, user=params.hive_user) raise File(params.spark_thrift_server_pid_file, action="delete")
'HCAT': 'hive-client', 'HIVE_SERVER_INTERACTIVE': 'hive-server2-hive2' } # Either HIVE_METASTORE, HIVE_SERVER, WEBHCAT_SERVER, HIVE_CLIENT, HCAT, HIVE_SERVER_INTERACTIVE role = default("/role", None) component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP, "HIVE_CLIENT") component_directory_interactive = Script.get_component_from_role( SERVER_ROLE_DIRECTORY_MAP, "HIVE_SERVER_INTERACTIVE") config = Script.get_config() stack_root = Script.get_stack_root() stack_version_unformatted = config['hostLevelParams']['stack_version'] stack_version_formatted_major = format_stack_version(stack_version_unformatted) if OSCheck.is_windows_family(): hive_metastore_win_service_name = "metastore" hive_client_win_service_name = "hwi" hive_server_win_service_name = "hiveserver2" webhcat_server_win_service_name = "templeton" else: hive_pid_dir = config['configurations']['hive-env']['hive_pid_dir'] hive_pid = 'hive-server.pid' hive_interactive_pid = 'hive-interactive.pid' hive_metastore_pid = 'hive.pid' hcat_pid_dir = config['configurations']['hive-env'][ 'hcat_pid_dir'] #hcat_pid_dir webhcat_pid_file = format('{hcat_pid_dir}/webhcat.pid')
def setup_spark(env, type, upgrade_type = None, action = None): import params # ensure that matching LZO libraries are installed for Spark lzo_utils.install_lzo_if_needed() Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents = True ) if type == 'server' and action == 'config': params.HdfsResource(params.spark_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775 ) if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse(params.default_fs).scheme: # Create Spark Warehouse Dir params.HdfsResource(params.spark_warehouse_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0777 ) params.HdfsResource(None, action="execute") generate_logfeeder_input_config('spark2', Template("input.config-spark2.json.j2", extra_imports=[default])) spark2_defaults = dict(params.config['configurations']['spark2-defaults']) if params.security_enabled: spark2_defaults.pop("history.server.spnego.kerberos.principal") spark2_defaults.pop("history.server.spnego.keytab.file") spark2_defaults['spark.history.kerberos.principal'] = spark2_defaults['spark.history.kerberos.principal'].replace('_HOST', socket.getfqdn().lower()) PropertiesFile(format("{spark_conf}/spark-defaults.conf"), properties = spark2_defaults, key_value_delimiter = " ", owner=params.spark_user, group=params.spark_group, mode=0644 ) # create spark-env.sh in etc/conf dir File(os.path.join(params.spark_conf, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh), mode=0644, ) #create log4j.properties in etc/conf dir File(os.path.join(params.spark_conf, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties, mode=0644, ) #create metrics.properties in etc/conf dir File(os.path.join(params.spark_conf, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties), mode=0644 ) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=params.spark_conf, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644) if params.has_spark_thriftserver: spark2_thrift_sparkconf = dict(params.config['configurations']['spark2-thrift-sparkconf']) if params.security_enabled and 'spark.yarn.principal' in spark2_thrift_sparkconf: spark2_thrift_sparkconf['spark.yarn.principal'] = spark2_thrift_sparkconf['spark.yarn.principal'].replace('_HOST', socket.getfqdn().lower()) PropertiesFile(params.spark_thrift_server_conf_file, properties = spark2_thrift_sparkconf, owner = params.hive_user, group = params.user_group, key_value_delimiter = " ", mode=0644 ) effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version): # create spark-thrift-fairscheduler.xml File(os.path.join(params.spark_conf,"spark-thrift-fairscheduler.xml"), owner=params.spark_user, group=params.spark_group, mode=0755, content=InlineTemplate(params.spark_thrift_fairscheduler_content) )
def upgrade_schema(self, env): """ Executes the schema upgrade binary. This is its own function because it could be called as a standalone task from the upgrade pack, but is safe to run it for each metastore instance. The schema upgrade on an already upgraded metastore is a NOOP. The metastore schema upgrade requires a database driver library for most databases. During an upgrade, it's possible that the library is not present, so this will also attempt to copy/download the appropriate driver. This function will also ensure that configurations are written out to disk before running since the new configs will most likely not yet exist on an upgrade. Should not be invoked for a DOWNGRADE; Metastore only supports schema upgrades. """ Logger.info("Upgrading Hive Metastore Schema") import params env.set_params(params) # ensure that configurations are written out before trying to upgrade the schema # since the schematool needs configs and doesn't know how to use the hive conf override self.configure(env) if params.security_enabled: kinit_command = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal}; " ) Execute(kinit_command, user=params.smokeuser) # ensure that the JDBC drive is present for the schema tool; if it's not # present, then download it first if params.hive_jdbc_driver in params.hive_jdbc_drivers_list: target_directory = format("{stack_root}/{version}/hive/lib") # download it if it does not exist if not os.path.exists(params.source_jdbc_file): jdbc_connector(params.target_hive) target_directory_and_filename = os.path.join( target_directory, os.path.basename(params.source_jdbc_file)) if params.sqla_db_used: target_native_libs_directory = format( "{target_directory}/native/lib64") Execute( format( "yes | {sudo} cp {jars_in_hive_lib} {target_directory}" )) Directory(target_native_libs_directory, create_parents=True) Execute( format( "yes | {sudo} cp {libs_in_hive_lib} {target_native_libs_directory}" )) Execute( format( "{sudo} chown -R {hive_user}:{user_group} {hive_lib}/*" )) else: # copy the JDBC driver from the older metastore location to the new location only # if it does not already exist if not os.path.exists(target_directory_and_filename): Execute(('cp', params.source_jdbc_file, target_directory), path=["/bin", "/usr/bin/"], sudo=True) File(target_directory_and_filename, mode=0644) # build the schema tool command binary = format("{hive_schematool_ver_bin}/schematool") # the conf.server directory changed locations between stack versions # since the configurations have not been written out yet during an upgrade # we need to choose the original legacy location schematool_hive_server_conf_dir = params.hive_server_conf_dir if params.current_version is not None: current_version = format_stack_version(params.current_version) if not (check_stack_feature(StackFeature.CONFIG_VERSIONING, current_version)): schematool_hive_server_conf_dir = LEGACY_HIVE_SERVER_CONF env_dict = {'HIVE_CONF_DIR': schematool_hive_server_conf_dir} command = format( "{binary} -dbType {hive_metastore_db_type} -upgradeSchema") Execute(command, user=params.hive_user, tries=1, environment=env_dict, logoutput=True)
from resource_management.libraries.functions.get_port_from_url import get_port_from_url from resource_management.libraries import functions # server configurations config = Script.get_config() tmp_dir = Script.get_tmp_dir() sudo = AMBARI_SUDO_BINARY stack_name = default("/hostLevelParams/stack_name", None) # node hostname hostname = config["hostname"] # This is expected to be of the form #.#.#.# stack_version_unformatted = str(config['hostLevelParams']['stack_version']) stack_version = format_stack_version(stack_version_unformatted) stack_is_21 = False # this is not available on INSTALL action because hdp-select is not available iop_stack_version = functions.get_stack_version('hive-server2') # New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade. # It cannot be used during the initial Cluser Install because the version is not yet known. version = default("/commandParams/version", None) # current host stack version current_version = default("/hostLevelParams/current_version", None) # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config)
'RANGER_ADMIN': 'ranger-admin', 'RANGER_USERSYNC': 'ranger-usersync' } component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP, "RANGER_ADMIN") config = Script.get_config() tmp_dir = Script.get_tmp_dir() stack_name = default("/hostLevelParams/stack_name", None) version = default("/commandParams/version", None) host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False) stack_version_unformatted = str(config['hostLevelParams']['stack_version']) stack_version_formatted = format_stack_version(stack_version_unformatted) upgrade_marker_file = format("{tmp_dir}/rangeradmin_ru.inprogress") xml_configurations_supported = config['configurations']['ranger-env'][ 'xml_configurations_supported'] create_db_dbuser = config['configurations']['ranger-env']['create_db_dbuser'] stack_is_hdp22_or_further = Script.is_stack_greater_or_equal("2.2") stack_is_hdp23_or_further = Script.is_stack_greater_or_equal("2.3") downgrade_from_version = default("/commandParams/downgrade_from_version", None) upgrade_direction = default("/commandParams/upgrade_direction", None) ranger_conf = '/etc/ranger/admin/conf'
def actionexecute(self, env): num_errors = 0 # Parse parameters config = Script.get_config() repo_rhel_suse = config['configurations']['cluster-env'][ 'repo_suse_rhel_template'] repo_ubuntu = config['configurations']['cluster-env'][ 'repo_ubuntu_template'] template = repo_rhel_suse if OSCheck.is_redhat_family( ) or OSCheck.is_suse_family() else repo_ubuntu # Handle a SIGTERM and SIGINT gracefully signal.signal(signal.SIGTERM, self.abort_handler) signal.signal(signal.SIGINT, self.abort_handler) # Select dict that contains parameters try: self.repository_version = config['roleParams'][ 'repository_version'] base_urls = json.loads(config['roleParams']['base_urls']) package_list = json.loads(config['roleParams']['package_list']) stack_id = config['roleParams']['stack_id'] except KeyError: # Last try self.repository_version = config['commandParams'][ 'repository_version'] base_urls = json.loads(config['commandParams']['base_urls']) package_list = json.loads(config['commandParams']['package_list']) stack_id = config['commandParams']['stack_id'] # current stack information self.current_stack_version_formatted = None if 'stack_version' in config['hostLevelParams']: current_stack_version_unformatted = str( config['hostLevelParams']['stack_version']) self.current_stack_version_formatted = format_stack_version( current_stack_version_unformatted) self.stack_name = Script.get_stack_name() if self.stack_name is None: raise Fail("Cannot determine the stack name") self.stack_root_folder = Script.get_stack_root() if self.stack_root_folder is None: raise Fail("Cannot determine the stack's root directory") if self.repository_version is None: raise Fail("Cannot determine the repository version to install") self.repository_version = self.repository_version.strip() # Install/update repositories installed_repositories = [] self.current_repositories = [] self.current_repo_files = set() # Enable base system repositories # We don't need that for RHEL family, because we leave all repos enabled # except disabled HDP* ones if OSCheck.is_suse_family(): self.current_repositories.append('base') elif OSCheck.is_ubuntu_family(): self.current_repo_files.add('base') Logger.info("Will install packages for repository version {0}".format( self.repository_version)) if 0 == len(base_urls): Logger.info( "Repository list is empty. Ambari may not be managing the repositories for {0}." .format(self.repository_version)) try: append_to_file = False for url_info in base_urls: repo_name, repo_file = self.install_repository( url_info, append_to_file, template) self.current_repositories.append(repo_name) self.current_repo_files.add(repo_file) append_to_file = True installed_repositories = list_ambari_managed_repos(self.stack_name) except Exception, err: Logger.logger.exception( "Cannot distribute repositories. Error: {0}".format(str(err))) num_errors += 1
def zookeeper(type=None, upgrade_type=None): import params if type == 'server': # This path may be missing after Ambari upgrade. We need to create it. We need to do this before any configs will # be applied. if upgrade_type is None and not os.path.exists(os.path.join(params.stack_root,"/current/zookeeper-server")) and params.current_version\ and check_stack_feature(StackFeature.ROLLING_UPGRADE, format_stack_version(params.version)): conf_select.select(params.stack_name, "zookeeper", params.current_version) stack_select.select("zookeeper-server", params.version) Directory(params.config_dir, owner=params.zk_user, create_parents=True, group=params.user_group) File(os.path.join(params.config_dir, "zookeeper-env.sh"), content=InlineTemplate(params.zk_env_sh_template), owner=params.zk_user, group=params.user_group) configFile("zoo.cfg", template_name="zoo.cfg.j2") configFile("configuration.xsl", template_name="configuration.xsl.j2") Directory( params.zk_pid_dir, owner=params.zk_user, create_parents=True, group=params.user_group, mode=0755, ) Directory( params.zk_log_dir, owner=params.zk_user, create_parents=True, group=params.user_group, mode=0755, ) Directory( params.zk_data_dir, owner=params.zk_user, create_parents=True, cd_access="a", group=params.user_group, mode=0755, ) if type == 'server': myid = str(sorted(params.zookeeper_hosts).index(params.hostname) + 1) File(os.path.join(params.zk_data_dir, "myid"), mode=0644, content=myid) if (params.log4j_props != None): File(os.path.join(params.config_dir, "log4j.properties"), mode=0644, group=params.user_group, owner=params.zk_user, content=params.log4j_props) elif (os.path.exists(os.path.join(params.config_dir, "log4j.properties"))): File(os.path.join(params.config_dir, "log4j.properties"), mode=0644, group=params.user_group, owner=params.zk_user) if params.security_enabled: if type == "server": configFile("zookeeper_jaas.conf", template_name="zookeeper_jaas.conf.j2") configFile("zookeeper_client_jaas.conf", template_name="zookeeper_client_jaas.conf.j2") else: configFile("zookeeper_client_jaas.conf", template_name="zookeeper_client_jaas.conf.j2") File(os.path.join(params.config_dir, "zoo_sample.cfg"), owner=params.zk_user, group=params.user_group)
def pre_upgrade_deregister(): """ Runs the "hive --service hiveserver2 --deregister <version>" command to de-provision the server in preparation for an upgrade. This will contact ZooKeeper to remove the server so that clients that attempt to connect will be directed to other servers automatically. Once all clients have drained, the server will shutdown automatically; this process could take a very long time. This function will obtain the Kerberos ticket if security is enabled. :return: """ import params Logger.info( 'HiveServer2 executing "deregister" command in preparation for upgrade...' ) if params.security_enabled: kinit_command = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal}; " ) Execute(kinit_command, user=params.smokeuser) # calculate the current hive server version current_hiveserver_version = _get_current_hiveserver_version() if current_hiveserver_version is None: raise Fail( 'Unable to determine the current HiveServer2 version to deregister.' ) # fallback when upgrading because /usr/iop/current/hive-server2/conf/conf.server may not exist hive_server_conf_dir = params.hive_server_conf_dir if not os.path.exists(hive_server_conf_dir): hive_server_conf_dir = "/etc/hive/conf.server" # deregister hive_execute_path = params.execute_path # If upgrading, the upgrade-target hive binary should be used to call the --deregister command. # If downgrading, the downgrade-source hive binary should be used to call the --deregister command. if "upgrade" == params.upgrade_direction: # hive_bin upgrade_target_version = format_stack_version(params.version) if upgrade_target_version and compare_versions(upgrade_target_version, "4.1.0.0") >= 0: upgrade_target_hive_bin = format('/usr/iop/{version}/hive/bin') if (os.pathsep + params.hive_bin) in hive_execute_path: hive_execute_path = hive_execute_path.replace( os.pathsep + params.hive_bin, os.pathsep + upgrade_target_hive_bin) # hadoop_bin_dir upgrade_target_hadoop_bin = stack_select.get_hadoop_dir( "bin", upgrade_stack_only=True) upgrade_source_hadoop_bin = params.hadoop_bin_dir if upgrade_target_hadoop_bin and len( upgrade_target_hadoop_bin) > 0 and ( os.pathsep + upgrade_source_hadoop_bin) in hive_execute_path: hive_execute_path = hive_execute_path.replace( os.pathsep + upgrade_source_hadoop_bin, os.pathsep + upgrade_target_hadoop_bin) command = format( 'hive --config {hive_server_conf_dir} --service hiveserver2 --deregister ' + current_hiveserver_version) Execute(command, user=params.hive_user, path=hive_execute_path, tries=1)
def get_stack_feature_version(config): """ Uses the specified ConfigDictionary to determine which version to use for stack feature checks. Normally, the commandParams/version is the correct value to use as it represent the 4-digit exact stack version/build being upgrade to or downgraded to. However, there are cases where the commands being sent are to stop running services which are on a different stack version from the version being upgraded/downgraded to. As a result, the configurations sent for these specific stop commands do not match commandParams/version. :param config: a ConfigDictionary instance to extra the hostLevelParams and commandParams from. :return: the version to use when checking stack features. """ from resource_management.libraries.functions.default import default if "hostLevelParams" not in config or "commandParams" not in config: raise Fail( "Unable to determine the correct version since hostLevelParams and commandParams were not present in the configuration dictionary" ) # should always be there stack_version = config['hostLevelParams']['stack_version'] # something like 2.4.0.0-1234; represents the version for the command # (or None if this is a cluster install and it hasn't been calculated yet) version = default("/commandParams/version", None) # something like 2.4.0.0-1234 # (or None if this is a cluster install and it hasn't been calculated yet) current_cluster_version = default("/hostLevelParams/current_version", None) # UPGRADE or DOWNGRADE (or None) upgrade_direction = default("/commandParams/upgrade_direction", None) # start out with the value that's right 99% of the time version_for_stack_feature_checks = version if version is not None else stack_version # if this is not an upgrade, then we take the simple path if upgrade_direction is None: Logger.info( "Stack Feature Version Info: stack_version={0}, version={1}, current_cluster_version={2} -> {3}" .format(stack_version, version, current_cluster_version, version_for_stack_feature_checks)) return version_for_stack_feature_checks # STOP commands are the trouble maker as they are intended to stop a service not on the # version of the stack being upgrade/downgraded to is_stop_command = _is_stop_command(config) if not is_stop_command: Logger.info( "Stack Feature Version Info: stack_version={0}, version={1}, current_cluster_version={2}, upgrade_direction={3} -> {4}" .format(stack_version, version, current_cluster_version, upgrade_direction, version_for_stack_feature_checks)) return version_for_stack_feature_checks original_stack = default("/commandParams/original_stack", None) target_stack = default("/commandParams/target_stack", None) # something like 2.5.0.0-5678 (or None) downgrade_from_version = default("/commandParams/downgrade_from_version", None) # guaranteed to have a STOP command now during an UPGRADE/DOWNGRADE, check direction if upgrade_direction.lower() == Direction.DOWNGRADE.lower(): if downgrade_from_version is None: Logger.warning( "Unable to determine the version being downgraded when stopping services, using {0}" .format(version_for_stack_feature_checks)) else: version_for_stack_feature_checks = downgrade_from_version else: # UPGRADE if current_cluster_version is not None: version_for_stack_feature_checks = current_cluster_version elif original_stack is not None: version_for_stack_feature_checks = format_stack_version( original_stack) else: version_for_stack_feature_checks = version if version is not None else stack_version Logger.info( "Stack Feature Version Info: stack_version={0}, version={1}, current_cluster_version={2}, upgrade_direction={3}, original_stack={4}, target_stack={5}, downgrade_from_version={6}, stop_command={7} -> {8}" .format(stack_version, version, current_cluster_version, upgrade_direction, original_stack, target_stack, downgrade_from_version, is_stop_command, version_for_stack_feature_checks)) return version_for_stack_feature_checks
def setup_spark(env, type, upgrade_type=None, action=None): import params Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents=True) if type == 'server' and action == 'config': params.HdfsResource(params.spark_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775) params.HdfsResource(None, action="execute") PropertiesFile( format("{spark_conf}/spark-defaults.conf"), properties=params.config['configurations']['spark2-defaults'], key_value_delimiter=" ", owner=params.spark_user, group=params.spark_group, ) # create spark-env.sh in etc/conf dir File( os.path.join(params.spark_conf, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh), mode=0644, ) #create log4j.properties in etc/conf dir File( os.path.join(params.spark_conf, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties, mode=0644, ) #create metrics.properties in etc/conf dir File(os.path.join(params.spark_conf, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties)) Directory( params.spark_logs_dir, owner=params.spark_user, group=params.spark_group, mode=0755, ) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=params.spark_conf, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644) if params.has_spark_thriftserver: PropertiesFile( params.spark_thrift_server_conf_file, properties=params.config['configurations'] ['spark2-thrift-sparkconf'], owner=params.hive_user, group=params.user_group, key_value_delimiter=" ", ) effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature( StackFeature.SPARK_16PLUS, effective_version): # create spark-thrift-fairscheduler.xml File(os.path.join(params.spark_conf, "spark-thrift-fairscheduler.xml"), owner=params.spark_user, group=params.spark_group, mode=0755, content=InlineTemplate(params.spark_thrift_fairscheduler_content))
def spark_service(name, upgrade_type=None, action=None): import params if action == 'start': effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if name == 'jobhistoryserver' and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version): # create & copy spark2-hdp-yarn-archive.tar.gz to hdfs if not params.sysprep_skip_copy_tarballs_hdfs: source_dir=params.spark_home+"/jars" tmp_archive_file=get_tarball_paths("spark2")[1] make_tarfile(tmp_archive_file, source_dir) copy_to_hdfs("spark2", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs, replace_existing_files=True) # create & copy spark2-hdp-hive-archive.tar.gz to hdfs if not params.sysprep_skip_copy_tarballs_hdfs: source_dir=params.spark_home+"/standalone-metastore" tmp_archive_file=get_tarball_paths("spark2hive")[1] make_tarfile(tmp_archive_file, source_dir) copy_to_hdfs("spark2hive", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs, replace_existing_files=True) # create spark history directory params.HdfsResource(params.spark_history_dir, type="directory", action="create_on_execute", owner=params.spark_user, group=params.user_group, mode=0777, recursive_chmod=True ) params.HdfsResource(None, action="execute") if params.security_enabled: spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ") Execute(spark_kinit_cmd, user=params.spark_user) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted): resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) if resource_created: params.HdfsResource(None, action="execute") if name == 'jobhistoryserver': create_catalog_cmd = format("{hive_schematool_bin}/schematool -dbType {hive_metastore_db_type} " "-createCatalog {default_metastore_catalog} " "-catalogDescription 'Default catalog, for Spark' -ifNotExists " "-catalogLocation {default_fs}{spark_warehouse_dir}") Execute(create_catalog_cmd, user = params.hive_user) historyserver_no_op_test = as_sudo(["test", "-f", params.spark_history_server_pid_file]) + " && " + as_sudo(["pgrep", "-F", params.spark_history_server_pid_file]) try: Execute(params.spark_history_server_start, user=params.spark_user, environment={'JAVA_HOME': params.java_home}, not_if=historyserver_no_op_test) except: show_logs(params.spark_log_dir, user=params.spark_user) raise elif name == 'sparkthriftserver': if params.security_enabled: hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_kerberos_principal}; ") Execute(hive_kinit_cmd, user=params.spark_user) thriftserver_no_op_test= as_sudo(["test", "-f", params.spark_thrift_server_pid_file]) + " && " + as_sudo(["pgrep", "-F", params.spark_thrift_server_pid_file]) try: Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}, not_if=thriftserver_no_op_test ) except: show_logs(params.spark_log_dir, user=params.spark_user) raise elif action == 'stop': if name == 'jobhistoryserver': try: Execute(format('{spark_history_server_stop}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home} ) except: show_logs(params.spark_log_dir, user=params.spark_user) raise File(params.spark_history_server_pid_file, action="delete" ) elif name == 'sparkthriftserver': try: Execute(format('{spark_thrift_server_stop}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home} ) except: show_logs(params.spark_log_dir, user=params.spark_user) raise File(params.spark_thrift_server_pid_file, action="delete" )
'SPARK_JOBHISTORYSERVER': 'spark-historyserver', 'SPARK_CLIENT': 'spark-client', 'SPARK_THRIFTSERVER': 'spark-thriftserver' } upgrade_direction = default("/commandParams/upgrade_direction", None) component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP, "SPARK_CLIENT") config = Script.get_config() tmp_dir = Script.get_tmp_dir() # New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade version = default("/commandParams/version", None) stack_name = default("/hostLevelParams/stack_name", None) iop_full_version = format_stack_version(version) hadoop_home = "/usr/iop/current/hadoop-client" spark_conf = format("/usr/iop/current/{component_directory}/conf") spark_log_dir = config['configurations']['spark-env']['spark_log_dir'] spark_pid_dir = status_params.spark_pid_dir spark_role_root = "spark-client" command_role = default("/role", "") if command_role == "SPARK_CLIENT": spark_role_root = "spark-client" elif command_role == "SPARK_JOBHISTORYSERVER": spark_role_root = "spark-historyserver" elif command_role == "SPARK_THRIFTSERVER": spark_role_root = "spark-thriftserver"
def kafka(upgrade_type=None): import params ensure_base_directories() kafka_server_config = mutable_config_dict( params.config['configurations']['kafka-broker']) # This still has an issue of hostnames being alphabetically out-of-order for broker.id in HDP-2.2. # Starting in HDP 2.3, Kafka handles the generation of broker.id so Ambari doesn't have to. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version( params.version) Logger.info(format("Effective stack version: {effective_version}")) kafka_server_config['host.name'] = params.hostname if params.has_metric_collector: kafka_server_config[ 'kafka.timeline.metrics.hosts'] = params.ams_collector_hosts kafka_server_config[ 'kafka.timeline.metrics.port'] = params.metric_collector_port kafka_server_config[ 'kafka.timeline.metrics.protocol'] = params.metric_collector_protocol kafka_server_config[ 'kafka.timeline.metrics.truststore.path'] = params.metric_truststore_path kafka_server_config[ 'kafka.timeline.metrics.truststore.type'] = params.metric_truststore_type kafka_server_config[ 'kafka.timeline.metrics.truststore.password'] = params.metric_truststore_password kafka_data_dir = kafka_server_config['log.dirs'] kafka_data_dirs = filter(None, kafka_data_dir.split(",")) Directory( kafka_data_dirs, mode=0755, cd_access='a', owner=params.kafka_user, group=params.user_group, create_parents=True, recursive_ownership=True, ) PropertiesFile( "server.properties", dir=params.conf_dir, properties=kafka_server_config, owner=params.kafka_user, group=params.user_group, ) File(format("{conf_dir}/kafka-env.sh"), mode=0755, owner=params.kafka_user, content=InlineTemplate(params.kafka_env_sh_template)) File(format("{kafka_bin_dir}/kafka-run-class.sh"), mode=0755, owner=params.kafka_user, content=InlineTemplate(params.kafka_run_class_content_template)) if (params.log4j_props != None): File(format("{conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.kafka_user, content=InlineTemplate(params.log4j_props)) if params.security_enabled and params.kafka_kerberos_enabled: if params.kafka_jaas_conf_template: File(format("{conf_dir}/kafka_jaas.conf"), owner=params.kafka_user, content=InlineTemplate(params.kafka_jaas_conf_template)) else: TemplateConfig(format("{conf_dir}/kafka_jaas.conf"), owner=params.kafka_user) if params.kafka_client_jaas_conf_template: File(format("{conf_dir}/kafka_client_jaas.conf"), owner=params.kafka_user, content=InlineTemplate( params.kafka_client_jaas_conf_template)) else: TemplateConfig(format("{conf_dir}/kafka_client_jaas.conf"), owner=params.kafka_user) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'kafka.conf'), owner='root', group='root', mode=0644, content=Template("kafka.conf.j2")) File(os.path.join(params.conf_dir, 'tools-log4j.properties'), owner='root', group='root', mode=0644, content=Template("tools-log4j.properties.j2")) setup_symlink(params.kafka_managed_pid_dir, params.kafka_pid_dir) setup_symlink(params.kafka_managed_log_dir, params.kafka_log_dir)
except: return None # server configurations config = Script.get_config() exec_tmp_dir = Script.get_tmp_dir() sudo = AMBARI_SUDO_BINARY stack_name = default("/hostLevelParams/stack_name", None) retryAble = default("/commandParams/command_retry_enabled", False) version = default("/commandParams/version", None) stack_version_unformatted = str(config['hostLevelParams']['stack_version']) stack_version = format_stack_version(stack_version_unformatted) stack_root = status_params.stack_root # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config) stack_supports_ranger_audit_db = check_stack_feature( StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks) component_directory = status_params.component_directory #hadoop params hadoop_bin_dir = stack_select.get_hadoop_dir("bin") hadoop_conf_dir = conf_select.get_hadoop_conf_dir() daemon_script = format( '/usr/iop/current/{component_directory}/bin/hbase-daemon.sh') region_mover = format(
def setup_spark(env, type, upgrade_type=None, action=None, config_dir=None): """ :param env: Python environment :param type: Spark component type :param upgrade_type: If in a stack upgrade, either UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING :param action: Action to perform, such as generate configs :param config_dir: Optional config directory to write configs to. """ import params if config_dir is None: config_dir = params.spark_conf Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents = True ) if type == 'server' and action == 'config': params.HdfsResource(params.spark_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775 ) params.HdfsResource(None, action="execute") PropertiesFile(os.path.join(config_dir, "spark-defaults.conf"), properties = params.config['configurations']['spark-defaults'], key_value_delimiter = " ", owner=params.spark_user, group=params.spark_group, ) # create spark-env.sh in etc/conf dir File(os.path.join(config_dir, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh), mode=0644, ) #create log4j.properties in etc/conf dir File(os.path.join(config_dir, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties, mode=0644, ) #create metrics.properties in etc/conf dir File(os.path.join(config_dir, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties) ) File(os.path.join(params.spark_conf, 'java-opts'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_javaopts_properties) ) Directory(params.spark_logs_dir, owner=params.spark_user, group=params.spark_group, mode=0755, ) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=config_dir, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644) if params.has_spark_thriftserver: PropertiesFile(params.spark_thrift_server_conf_file, properties = params.config['configurations']['spark-thrift-sparkconf'], owner = params.hive_user, group = params.user_group, key_value_delimiter = " ", ) effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version): # create spark-thrift-fairscheduler.xml File(os.path.join(config_dir,"spark-thrift-fairscheduler.xml"), owner=params.spark_user, group=params.spark_group, mode=0755, content=InlineTemplate(params.spark_thrift_fairscheduler_content) )
def kafka(upgrade_type=None): import params ensure_base_directories() kafka_server_config = mutable_config_dict( params.config['configurations']['kafka-broker']) # This still has an issue of hostnames being alphabetically out-of-order for broker.id in HDP-2.2. # Starting in HDP 2.3, Kafka handles the generation of broker.id so Ambari doesn't have to. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version( params.version) Logger.info(format("Effective stack version: {effective_version}")) # In HDP-2.2 (Apache Kafka 0.8.1.1) we used to generate broker.ids based on hosts and add them to # kafka's server.properties. In future version brokers can generate their own ids based on zookeeper seq # We need to preserve the broker.id when user is upgrading from HDP-2.2 to any higher version. # Once its preserved it will be written to kafka.log.dirs/meta.properties and it will be used from there on # similarly we need preserve port as well during the upgrade if upgrade_type is not None and params.upgrade_direction == Direction.UPGRADE and \ check_stack_feature(StackFeature.CREATE_KAFKA_BROKER_ID, params.current_version) and \ check_stack_feature(StackFeature.KAFKA_LISTENERS, params.version): if len(params.kafka_hosts ) > 0 and params.hostname in params.kafka_hosts: brokerid = str(sorted(params.kafka_hosts).index(params.hostname)) kafka_server_config['broker.id'] = brokerid Logger.info(format("Calculating broker.id as {brokerid}")) if 'port' in kafka_server_config: port = kafka_server_config['port'] Logger.info(format("Port config from previous verson: {port}")) listeners = kafka_server_config['listeners'] kafka_server_config['listeners'] = listeners.replace("6667", port) Logger.info( format("Kafka listeners after the port update: {listeners}")) del kafka_server_config['port'] if effective_version is not None and effective_version != "" and \ check_stack_feature(StackFeature.CREATE_KAFKA_BROKER_ID, effective_version): if len(params.kafka_hosts ) > 0 and params.hostname in params.kafka_hosts: brokerid = str(sorted(params.kafka_hosts).index(params.hostname)) kafka_server_config['broker.id'] = brokerid Logger.info(format("Calculating broker.id as {brokerid}")) # listeners and advertised.listeners are only added in 2.3.0.0 onwards. if effective_version is not None and effective_version != "" and \ check_stack_feature(StackFeature.KAFKA_LISTENERS, effective_version): listeners = kafka_server_config['listeners'].replace( "localhost", params.hostname) Logger.info(format("Kafka listeners: {listeners}")) kafka_server_config['listeners'] = listeners if params.security_enabled and params.kafka_kerberos_enabled: Logger.info("Kafka kerberos security is enabled.") kafka_server_config['advertised.listeners'] = listeners Logger.info(format("Kafka advertised listeners: {listeners}")) elif 'advertised.listeners' in kafka_server_config: advertised_listeners = kafka_server_config[ 'advertised.listeners'].replace("localhost", params.hostname) kafka_server_config['advertised.listeners'] = advertised_listeners Logger.info( format("Kafka advertised listeners: {advertised_listeners}")) else: kafka_server_config['host.name'] = params.hostname if params.has_metric_collector: kafka_server_config[ 'kafka.timeline.metrics.hosts'] = params.ams_collector_hosts kafka_server_config[ 'kafka.timeline.metrics.port'] = params.metric_collector_port kafka_server_config[ 'kafka.timeline.metrics.protocol'] = params.metric_collector_protocol kafka_server_config[ 'kafka.timeline.metrics.truststore.path'] = params.metric_truststore_path kafka_server_config[ 'kafka.timeline.metrics.truststore.type'] = params.metric_truststore_type kafka_server_config[ 'kafka.timeline.metrics.truststore.password'] = params.metric_truststore_password kafka_data_dir = kafka_server_config['log.dirs'] kafka_data_dirs = filter(None, kafka_data_dir.split(",")) rack = "/default-rack" i = 0 if len(params.all_racks) > 0: for host in params.all_hosts: if host == params.hostname: rack = params.all_racks[i] break i = i + 1 kafka_server_config['broker.rack'] = rack Directory( kafka_data_dirs, mode=0755, cd_access='a', owner=params.kafka_user, group=params.user_group, create_parents=True, recursive_ownership=True, ) PropertiesFile( "server.properties", dir=params.conf_dir, properties=kafka_server_config, owner=params.kafka_user, group=params.user_group, ) File(format("{conf_dir}/kafka-env.sh"), owner=params.kafka_user, content=InlineTemplate(params.kafka_env_sh_template)) if (params.log4j_props != None): File(format("{conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.kafka_user, content=InlineTemplate(params.log4j_props)) if params.security_enabled and params.kafka_kerberos_enabled: if params.kafka_jaas_conf_template: File(format("{conf_dir}/kafka_jaas.conf"), owner=params.kafka_user, content=InlineTemplate(params.kafka_jaas_conf_template)) else: TemplateConfig(format("{conf_dir}/kafka_jaas.conf"), owner=params.kafka_user) if params.kafka_client_jaas_conf_template: File(format("{conf_dir}/kafka_client_jaas.conf"), owner=params.kafka_user, content=InlineTemplate( params.kafka_client_jaas_conf_template)) else: TemplateConfig(format("{conf_dir}/kafka_client_jaas.conf"), owner=params.kafka_user) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'kafka.conf'), owner='root', group='root', mode=0644, content=Template("kafka.conf.j2")) File(os.path.join(params.conf_dir, 'tools-log4j.properties'), owner='root', group='root', mode=0644, content=Template("tools-log4j.properties.j2")) setup_symlink(params.kafka_managed_pid_dir, params.kafka_pid_dir) setup_symlink(params.kafka_managed_log_dir, params.kafka_log_dir)
def falcon(type, action=None, upgrade_type=None): import params if action == 'config': Directory( params.falcon_pid_dir, owner=params.falcon_user, create_parents=True, mode=0755, cd_access="a", ) Directory( params.falcon_log_dir, owner=params.falcon_user, create_parents=True, mode=0755, cd_access="a", ) Directory(params.falcon_webapp_dir, owner=params.falcon_user, create_parents=True) Directory(params.falcon_home, owner=params.falcon_user, create_parents=True) Directory(params.etc_prefix_dir, mode=0755, create_parents=True) Directory(params.falcon_conf_dir, owner=params.falcon_user, create_parents=True) File( params.falcon_conf_dir + '/falcon-env.sh', content=InlineTemplate(params.falcon_env_sh_template), owner=params.falcon_user, group=params.user_group, ) PropertiesFile(params.falcon_conf_dir + '/client.properties', properties=params.falcon_client_properties, mode=0644, owner=params.falcon_user) PropertiesFile(params.falcon_conf_dir + '/runtime.properties', properties=params.falcon_runtime_properties, mode=0644, owner=params.falcon_user) PropertiesFile(params.falcon_conf_dir + '/startup.properties', properties=params.falcon_startup_properties, mode=0644, owner=params.falcon_user) File(params.falcon_conf_dir + '/log4j.properties', content=InlineTemplate(params.falcon_log4j), group=params.user_group, mode=0644, owner=params.falcon_user) if params.falcon_graph_storage_directory: Directory(params.falcon_graph_storage_directory, owner=params.falcon_user, group=params.user_group, mode=0775, create_parents=True, cd_access="a") if params.falcon_graph_serialize_path: Directory(params.falcon_graph_serialize_path, owner=params.falcon_user, group=params.user_group, mode=0775, create_parents=True, cd_access="a") # Generate atlas-application.properties.xml file if params.falcon_atlas_support and params.enable_atlas_hook: # If Atlas is added later than Falcon, this package will be absent. install_atlas_hook_packages( params.atlas_plugin_package, params.atlas_ubuntu_plugin_package, params.host_sys_prepped, params.agent_stack_retry_on_unavailability, params.agent_stack_retry_count) atlas_hook_filepath = os.path.join(params.falcon_conf_dir, params.atlas_hook_filename) setup_atlas_hook(SERVICE.FALCON, params.falcon_atlas_application_properties, atlas_hook_filepath, params.falcon_user, params.user_group) # Falcon 0.10 uses FALCON_EXTRA_CLASS_PATH. # Setup symlinks for older versions. if params.current_version_formatted and check_stack_feature( StackFeature.FALCON_ATLAS_SUPPORT_2_3, params.current_version_formatted): setup_atlas_jar_symlinks("falcon", params.falcon_webinf_lib) if type == 'server': if action == 'config': if params.store_uri[0:4] == "hdfs": params.HdfsResource(params.store_uri, type="directory", action="create_on_execute", owner=params.falcon_user, mode=0755) elif params.store_uri[0:4] == "file": Directory(params.store_uri[7:], owner=params.falcon_user, create_parents=True) # TODO change to proper mode params.HdfsResource(params.falcon_apps_dir, type="directory", action="create_on_execute", owner=params.falcon_user, mode=0777) # In HDP 2.4 and earlier, the data-mirroring directory was copied to HDFS. if params.supports_data_mirroring: params.HdfsResource(params.dfs_data_mirroring_dir, type="directory", action="create_on_execute", owner=params.falcon_user, group=params.proxyuser_group, recursive_chown=True, recursive_chmod=True, mode=0770, source=params.local_data_mirroring_dir) # Falcon Extensions were supported in HDP 2.5 and higher. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version( params.version) supports_falcon_extensions = effective_version and check_stack_feature( StackFeature.FALCON_EXTENSIONS, effective_version) if supports_falcon_extensions: params.HdfsResource(params.falcon_extensions_dest_dir, type="directory", action="create_on_execute", owner=params.falcon_user, group=params.proxyuser_group, recursive_chown=True, recursive_chmod=True, mode=0755, source=params.falcon_extensions_source_dir) # Create the extensons HiveDR store params.HdfsResource(os.path.join( params.falcon_extensions_dest_dir, "mirroring"), type="directory", action="create_on_execute", owner=params.falcon_user, group=params.proxyuser_group, mode=0770) # At least one HDFS Dir should be created, so execute the change now. params.HdfsResource(None, action="execute") Directory(params.falcon_local_dir, owner=params.falcon_user, create_parents=True, cd_access="a") if params.falcon_embeddedmq_enabled == True: Directory(os.path.abspath( os.path.join(params.falcon_embeddedmq_data, "..")), owner=params.falcon_user, create_parents=True) Directory(params.falcon_embeddedmq_data, owner=params.falcon_user, create_parents=True) # although Falcon's falcon-config.sh will use 'which hadoop' to figure # this out, in an upgraded cluster, it's possible that 'which hadoop' # still points to older binaries; it's safer to just pass in the # hadoop home directory to use environment_dictionary = {"HADOOP_HOME": params.hadoop_home_dir} pid = get_user_call_output.get_user_call_output( format("cat {server_pid_file}"), user=params.falcon_user, is_checked_call=False)[1] process_exists = format("ls {server_pid_file} && ps -p {pid}") if action == 'start': try: Execute( format('{falcon_home}/bin/falcon-config.sh server falcon'), user=params.falcon_user, path=params.hadoop_bin_dir, environment=environment_dictionary, not_if=process_exists, ) except: show_logs(params.falcon_log_dir, params.falcon_user) raise if not os.path.exists(params.target_jar_file): try: File(params.target_jar_file, content=DownloadSource(params.bdb_resource_name), mode=0755) except: exc_msg = traceback.format_exc() exception_message = format( "Caught Exception while downloading {bdb_resource_name}:\n{exc_msg}" ) Logger.error(exception_message) if not os.path.isfile(params.target_jar_file): error_message = """ If you are using bdb as the Falcon graph db store, please run ambari-server setup --jdbc-db=bdb --jdbc-driver=<path to je5.0.73.jar> on the ambari server host. Otherwise falcon startup will fail. Otherwise please configure Falcon to use HBase as the backend as described in the Falcon documentation. """ Logger.error(error_message) try: Execute( format( '{falcon_home}/bin/falcon-start -port {falcon_port}'), user=params.falcon_user, path=params.hadoop_bin_dir, environment=environment_dictionary, not_if=process_exists, ) except: show_logs(params.falcon_log_dir, params.falcon_user) raise if action == 'stop': try: Execute(format('{falcon_home}/bin/falcon-stop'), user=params.falcon_user, path=params.hadoop_bin_dir, environment=environment_dictionary) except: show_logs(params.falcon_log_dir, params.falcon_user) raise File(params.server_pid_file, action='delete')
def pre_rolling_restart(self, env): import params env.set_params(params) if params.version and compare_versions(format_stack_version(params.version), '4.0.0.0') >= 0: stack_select.select_packages(params.version)
def kafka(upgrade_type=None): import params ensure_base_directories() kafka_server_config = mutable_config_dict( params.config['configurations']['kafka-broker']) # This still has an issue of hostnames being alphabetically out-of-order for broker.id in HDP-2.2. # Starting in HDP 2.3, Kafka handles the generation of broker.id so Ambari doesn't have to. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version( params.version) Logger.info(format("Effective stack version: {effective_version}")) # listeners and advertised.listeners are only added in 2.3.0.0 onwards. if effective_version is not None and effective_version != "" and \ check_stack_feature(StackFeature.KAFKA_LISTENERS, effective_version): listeners = kafka_server_config['listeners'].replace( "localhost", params.hostname) Logger.info(format("Kafka listeners: {listeners}")) kafka_server_config['listeners'] = listeners if params.kerberos_security_enabled and params.kafka_kerberos_enabled: Logger.info("Kafka kerberos security is enabled.") if "SASL" not in listeners: listeners = kafka_server_config['listeners'] listeners = re.sub(r"(^|\b)PLAINTEXT://", "SASL_PLAINTEXT://", listeners) listeners = re.sub(r"(^|\b)PLAINTEXTSASL://", "SASL_PLAINTEXT://", listeners) listeners = re.sub(r"(^|\b)SSL://", "SASL_SSL://", listeners) kafka_server_config['listeners'] = listeners kafka_server_config['advertised.listeners'] = listeners Logger.info(format("Kafka advertised listeners: {listeners}")) elif 'advertised.listeners' in kafka_server_config: advertised_listeners = kafka_server_config[ 'advertised.listeners'].replace("localhost", params.hostname) kafka_server_config['advertised.listeners'] = advertised_listeners Logger.info( format("Kafka advertised listeners: {advertised_listeners}")) else: kafka_server_config['host.name'] = params.hostname if params.has_metric_collector: kafka_server_config[ 'kafka.timeline.metrics.hosts'] = params.ams_collector_hosts kafka_server_config[ 'kafka.timeline.metrics.port'] = params.metric_collector_port kafka_server_config[ 'kafka.timeline.metrics.protocol'] = params.metric_collector_protocol kafka_server_config[ 'kafka.timeline.metrics.truststore.path'] = params.metric_truststore_path kafka_server_config[ 'kafka.timeline.metrics.truststore.type'] = params.metric_truststore_type kafka_server_config[ 'kafka.timeline.metrics.truststore.password'] = params.metric_truststore_password kafka_data_dir = kafka_server_config['log.dirs'] kafka_data_dirs = filter(None, kafka_data_dir.split(",")) rack = "/default-rack" i = 0 if len(params.all_racks) > 0: for host in params.all_hosts: if host == params.hostname: rack = params.all_racks[i] break i = i + 1 Directory( kafka_data_dirs, mode=0755, cd_access='a', owner=params.kafka_user, group=params.user_group, create_parents=True, recursive_ownership=True, ) PropertiesFile( "server.properties", mode=0640, dir=params.conf_dir, properties=kafka_server_config, owner=params.kafka_user, group=params.user_group, ) File(format("{conf_dir}/kafka-env.sh"), owner=params.kafka_user, content=InlineTemplate(params.kafka_env_sh_template)) if (params.log4j_props != None): File(format("{conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.kafka_user, content=InlineTemplate(params.log4j_props)) if (params.kerberos_security_enabled and params.kafka_kerberos_enabled) or params.kafka_other_sasl_enabled: if params.kafka_jaas_conf_template: File(format("{conf_dir}/kafka_jaas.conf"), owner=params.kafka_user, content=InlineTemplate(params.kafka_jaas_conf_template)) else: TemplateConfig(format("{conf_dir}/kafka_jaas.conf"), owner=params.kafka_user) if params.kafka_client_jaas_conf_template: File(format("{conf_dir}/kafka_client_jaas.conf"), owner=params.kafka_user, content=InlineTemplate( params.kafka_client_jaas_conf_template)) else: TemplateConfig(format("{conf_dir}/kafka_client_jaas.conf"), owner=params.kafka_user) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'kafka.conf'), owner='root', group='root', mode=0644, content=Template("kafka.conf.j2")) File(os.path.join(params.conf_dir, 'tools-log4j.properties'), owner='root', group='root', mode=0644, content=Template("tools-log4j.properties.j2")) generate_logfeeder_input_config( 'kafka', Template("input.config-kafka.json.j2", extra_imports=[default])) setup_symlink(params.kafka_managed_pid_dir, params.kafka_pid_dir) setup_symlink(params.kafka_managed_log_dir, params.kafka_log_dir)
def kafka(upgrade_type=None): import params ensure_base_directories() kafka_server_config = mutable_config_dict(params.config['configurations']['kafka-broker']) # This still has an issue of hostnames being alphabetically out-of-order for broker.id in HDP-2.2. # Starting in HDP 2.3, Kafka handles the generation of broker.id so Ambari doesn't have to. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(params.version) Logger.info(format("Effective stack version: {effective_version}")) if effective_version is not None and effective_version != "" and compare_versions(effective_version, '2.2.0.0') >= 0 and compare_versions(effective_version, '2.3.0.0') < 0: if len(params.kafka_hosts) > 0 and params.hostname in params.kafka_hosts: brokerid = str(sorted(params.kafka_hosts).index(params.hostname)) kafka_server_config['broker.id'] = brokerid Logger.info(format("Calculating broker.id as {brokerid}")) # listeners and advertised.listeners are only added in 2.3.0.0 onwards. if effective_version is not None and effective_version != "" and compare_versions(effective_version, '2.3.0.0') >= 0: listeners = kafka_server_config['listeners'].replace("localhost", params.hostname) Logger.info(format("Kafka listeners: {listeners}")) if params.security_enabled and params.kafka_kerberos_enabled: Logger.info("Kafka kerberos security is enabled.") if "SASL" not in listeners: listeners = listeners.replace("PLAINTEXT", "PLAINTEXTSASL") kafka_server_config['listeners'] = listeners kafka_server_config['advertised.listeners'] = listeners Logger.info(format("Kafka advertised listeners: {listeners}")) else: kafka_server_config['listeners'] = listeners if 'advertised.listeners' in kafka_server_config: advertised_listeners = kafka_server_config['advertised.listeners'].replace("localhost", params.hostname) kafka_server_config['advertised.listeners'] = advertised_listeners Logger.info(format("Kafka advertised listeners: {advertised_listeners}")) else: kafka_server_config['host.name'] = params.hostname if params.has_metric_collector: kafka_server_config['kafka.timeline.metrics.host'] = params.metric_collector_host kafka_server_config['kafka.timeline.metrics.port'] = params.metric_collector_port kafka_server_config['kafka.timeline.metrics.protocol'] = params.metric_collector_protocol kafka_server_config['kafka.timeline.metrics.truststore.path'] = params.metric_truststore_path kafka_server_config['kafka.timeline.metrics.truststore.type'] = params.metric_truststore_type kafka_server_config['kafka.timeline.metrics.truststore.password'] = params.metric_truststore_password kafka_data_dir = kafka_server_config['log.dirs'] kafka_data_dirs = filter(None, kafka_data_dir.split(",")) Directory(kafka_data_dirs, mode=0755, cd_access='a', owner=params.kafka_user, group=params.user_group, create_parents = True, recursive_ownership = True, ) PropertiesFile("server.properties", dir=params.conf_dir, properties=kafka_server_config, owner=params.kafka_user, group=params.user_group, ) File(format("{conf_dir}/kafka-env.sh"), owner=params.kafka_user, content=InlineTemplate(params.kafka_env_sh_template) ) if (params.log4j_props != None): File(format("{conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.kafka_user, content=params.log4j_props ) if params.security_enabled and params.kafka_kerberos_enabled: TemplateConfig(format("{conf_dir}/kafka_jaas.conf"), owner=params.kafka_user) TemplateConfig(format("{conf_dir}/kafka_client_jaas.conf"), owner=params.kafka_user) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents = True, owner='root', group='root' ) File(os.path.join(params.limits_conf_dir, 'kafka.conf'), owner='root', group='root', mode=0644, content=Template("kafka.conf.j2") ) File(os.path.join(params.conf_dir, 'tools-log4j.properties'), owner='root', group='root', mode=0644, content=Template("tools-log4j.properties.j2") ) setup_symlink(params.kafka_managed_pid_dir, params.kafka_pid_dir) setup_symlink(params.kafka_managed_log_dir, params.kafka_log_dir)
def prestart(env, component): import params if params.version and compare_versions(format_stack_version(params.version), '4.1.0.0') >= 0: conf_select.select(params.stack_name, "kafka", params.version) stack_select.select(component, params.version)
def install_windows_msi(url_base, save_dir, save_files, hadoop_user, hadoop_password, stack_version): global _working_dir _working_dir = save_dir save_dir = os.path.abspath(save_dir) msi_save_dir = save_dir # system wide lock to prevent simultaneous installations(when first task failed on timeout) install_lock = SystemWideLock("Global\\hdp_msi_lock") try: # try to acquire lock if not install_lock.lock(): Logger.info( "Some other task currently installing hdp.msi, waiting for 10 min for finish" ) if not install_lock.lock(600000): raise Fail("Timeout on acquiring lock") if _validate_msi_install(): Logger.info("hdp.msi already installed") return stack_version_formatted = format_stack_version(stack_version) hdp_22_specific_props = '' if stack_version_formatted != "" and compare_versions( stack_version_formatted, '2.2') >= 0: hdp_22_specific_props = hdp_22.format(data_dir=data_dir) # MSIs cannot be larger than 2GB. HDPWIN 2.3 needed split in order to accommodate this limitation msi_file = '' for save_file in save_files: if save_file.lower().endswith(".msi"): msi_file = save_file file_url = urlparse.urljoin(url_base, save_file) try: download_file(file_url, os.path.join(msi_save_dir, save_file)) except: raise Fail("Failed to download {url}".format(url=file_url)) File(os.path.join(msi_save_dir, "properties.txt"), content=cluster_properties.format( log_dir=log_dir, data_dir=data_dir, local_host=local_host, db_flavor=db_flavor, hdp_22_specific_props=hdp_22_specific_props)) # install msi msi_path = os_utils.quote_path(os.path.join(save_dir, msi_file)) log_path = os_utils.quote_path( os.path.join(save_dir, msi_file[:-3] + "log")) layout_path = os_utils.quote_path( os.path.join(save_dir, "properties.txt")) hadoop_password_arg = os_utils.quote_path(hadoop_password) Execute( INSTALL_MSI_CMD.format(msi_path=msi_path, log_path=log_path, layout_path=layout_path, hadoop_user=hadoop_user, hadoop_password_arg=hadoop_password_arg)) reload_windows_env() # create additional services manually due to hdp.msi limitaitons _ensure_services_created(hadoop_user, hadoop_password) _create_symlinks(stack_version) # finalizing install _write_marker() _validate_msi_install() finally: install_lock.unlock()
sudo = AMBARI_SUDO_BINARY # Global flag enabling or disabling the sysprep feature host_sys_prepped = default("/ambariLevelParams/host_sys_prepped", False) # Whether to skip copying fast-hdfs-resource.jar to /var/lib/ambari-agent/lib/ # This is required if tarballs are going to be copied to HDFS, so set to False sysprep_skip_copy_fast_jar_hdfs = host_sys_prepped and default( "/configurations/cluster-env/sysprep_skip_copy_fast_jar_hdfs", False) # Whether to skip setting up the unlimited key JCE policy sysprep_skip_setup_jce = host_sys_prepped and default( "/configurations/cluster-env/sysprep_skip_setup_jce", False) stack_version_unformatted = config['clusterLevelParams']['stack_version'] stack_version_formatted = format_stack_version(stack_version_unformatted) major_stack_version = get_major_version(stack_version_formatted) dfs_type = default("/clusterLevelParams/dfs_type", "") hadoop_conf_dir = "/etc/hadoop" component_list = default("/localComponents", []) hdfs_tmp_dir = default("/configurations/hadoop-env/hdfs_tmp_dir", "/tmp") hadoop_metrics2_properties_content = None if 'hadoop-metrics2.properties' in config['configurations']: hadoop_metrics2_properties_content = config['configurations'][ 'hadoop-metrics2.properties']['content'] hadoop_home = stack_root + '/hadoop' hadoop_libexec_dir = hadoop_home + "/libexec"
def prestart(env): import params if params.version and compare_versions(format_stack_version(params.version), '4.0.0.0') >= 0: stack_select.select_packages(params.version)
def spark_service(name, upgrade_type=None, action=None): import params if action == 'start': effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version): # create & copy spark2-hdp-yarn-archive.tar.gz to hdfs source_dir=params.spark_home+"/jars" tmp_archive_file="/tmp/spark2/spark2-hdp-yarn-archive.tar.gz" make_tarfile(tmp_archive_file, source_dir) copy_to_hdfs("spark2", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) # create spark history directory params.HdfsResource(params.spark_history_dir, type="directory", action="create_on_execute", owner=params.spark_user, group=params.user_group, mode=0777, recursive_chmod=True ) params.HdfsResource(None, action="execute") if params.security_enabled: spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ") Execute(spark_kinit_cmd, user=params.spark_user) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted): resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute") if name == 'jobhistoryserver': historyserver_no_op_test = format( 'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1') try: Execute(format('{spark_history_server_start}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}, not_if=historyserver_no_op_test) except: show_logs(params.spark_log_dir, user=params.spark_user) raise elif name == 'sparkthriftserver': if params.security_enabled: hive_principal = params.hive_kerberos_principal.replace('_HOST', socket.getfqdn().lower()) hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; ") Execute(hive_kinit_cmd, user=params.hive_user) thriftserver_no_op_test = format( 'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1') try: Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'), user=params.hive_user, environment={'JAVA_HOME': params.java_home}, not_if=thriftserver_no_op_test ) except: show_logs(params.spark_log_dir, user=params.hive_user) raise elif action == 'stop': if name == 'jobhistoryserver': try: Execute(format('{spark_history_server_stop}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home} ) except: show_logs(params.spark_log_dir, user=params.spark_user) raise File(params.spark_history_server_pid_file, action="delete" ) elif name == 'sparkthriftserver': try: Execute(format('{spark_thrift_server_stop}'), user=params.hive_user, environment={'JAVA_HOME': params.java_home} ) except: show_logs(params.spark_log_dir, user=params.hive_user) raise File(params.spark_thrift_server_pid_file, action="delete" )
agent_stack_retry_on_unavailability = config['hostLevelParams'][ 'agent_stack_retry_on_unavailability'] agent_stack_retry_count = expect("/hostLevelParams/agent_stack_retry_count", int) # New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade version = default("/commandParams/version", None) stack_version_unformatted = status_params.stack_version_unformatted stack_version_formatted = status_params.stack_version_formatted upgrade_direction = default("/commandParams/upgrade_direction", None) # current host stack version current_version = default("/hostLevelParams/current_version", None) current_version_formatted = format_stack_version(current_version) etc_prefix_dir = "/etc/falcon" # hadoop params hadoop_home_dir = stack_select.get_hadoop_dir("home") hadoop_bin_dir = stack_select.get_hadoop_dir("bin") if stack_version_formatted and check_stack_feature( StackFeature.ROLLING_UPGRADE, stack_version_formatted): # if this is a server action, then use the server binaries; smoke tests # use the client binaries server_role_dir_mapping = { 'FALCON_SERVER': 'falcon-server', 'FALCON_SERVICE_CHECK': 'falcon-client' }
from resource_management.libraries.functions import get_kinit_path from resource_management.libraries.functions.get_not_managed_resources import get_not_managed_resources from resource_management.libraries.functions.setup_ranger_plugin_xml import get_audit_configs, generate_ranger_service_config # server configurations config = Script.get_config() tmp_dir = Script.get_tmp_dir() stack_root = Script.get_stack_root() stack_name = default("/clusterLevelParams/stack_name", None) retryAble = default("/commandParams/command_retry_enabled", False) # Version being upgraded/downgraded to version = default("/commandParams/version", None) stack_version_unformatted = config['clusterLevelParams']['stack_version'] stack_version_formatted = format_stack_version(stack_version_unformatted) upgrade_direction = default("/commandParams/upgrade_direction", None) # get the correct version to use for checking stack features version_for_stack_feature_checks = get_stack_feature_version(config) stack_supports_ranger_kerberos = check_stack_feature( StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks) stack_supports_ranger_audit_db = check_stack_feature( StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks) stack_supports_core_site_for_ranger_plugin = check_stack_feature( StackFeature.CORE_SITE_FOR_RANGER_PLUGINS_SUPPORT, version_for_stack_feature_checks) # When downgrading the 'version' is pointing to the downgrade-target version # downgrade_from_version provides the source-version the downgrade is happening from
'alluxio.archive.file'] # alluxio master address if 'clusterHostInfo' in config: alluxio_master = config['clusterHostInfo']['alluxio_master_hosts'] # alluxio underfs address underfs_addr = config['configurations']['alluxio-env'][ 'alluxio.underfs.address'] # alluxio worker memory allotment worker_mem = config['configurations']['alluxio-env']['alluxio.worker.memory'] # Find current stack and version to push agent files to stack_name = default("/hostLevelParams/stack_name", None) stack_version = format_stack_version(default("/commandParams/version", "2.6")) # Set install dir usr_base = "/usr/hdp/" base_dir = usr_base + stack_version[:3] + "/alluxio/" # Alluxio archive on agent nodes alluxio_package_dir = "/var/lib/ambari-agent/cache/stacks/" + stack_name + "/" + stack_version[: 3] + "/services/ALLUXIO/package/" # alluxio log dir log_dir = config['configurations']['alluxio-env']['alluxio.log.dir'] # alluxio log dir pid_dir = config['configurations']['alluxio-env']['alluxio.pid.dir']