def get_spark_version(service_name, component_name, yarn_version): """ Attempts to calculate the correct version placeholder value for spark or spark2 based on what is installed in the cluster. If Spark is not installed, then this value will need to be that of YARN so it can still find the correct shuffle class. On cluster installs, we have not yet calcualted any versions and all known values could be None. This doesn't affect daemons, but it does affect client-only hosts where they will never receive a start command after install. Therefore, this function will attempt to use stack-select as a last resort to get a value value. :param service_name: the service name (SPARK, SPARK2, etc) :param component_name: the component name (SPARK_CLIENT, etc) :param yarn_version: the default version of Yarn to use if no spark is installed :return: a value for the version placeholder in shuffle classpath properties """ # start off seeing if we need to populate a default value for YARN if yarn_version is None: yarn_version = component_version.get_component_repository_version(service_name = "YARN", component_name = "YARN_CLIENT") # now try to get the version of spark/spark2, defaulting to the version if YARN shuffle_classpath_version = component_version.get_component_repository_version(service_name = service_name, component_name = component_name, default_value = yarn_version) # even with the default of using YARN's version, on an install this might be None since we haven't # calculated the version of YARN yet - use stack_select as a last ditch effort if shuffle_classpath_version is None: try: shuffle_classpath_version = stack_select.get_role_component_current_stack_version() except: Logger.exception("Unable to query for the correct shuffle classpath") return shuffle_classpath_version
def test_get_component_version_by_service_name(self): """ Tests that the component version map can be parsed using only the service name :return: """ command_json = TestComponentVersionMapping._get_component_version_mappings() Script.config = command_json version = component_version.get_component_repository_version(service_name="HDFS") self.assertEqual(version, "2.5.0.0-1234") version = component_version.get_component_repository_version(service_name = "ZOOKEEPER") self.assertEqual(version, "2.6.0.0-9999")
def get_hadoop_dir(target): """ Return the hadoop shared directory which should be used for the command's component. The directory including the component's version is tried first, but if that doesn't exist, this will fallback to using "current". :target: the target directory """ stack_root = Script.get_stack_root() stack_version = Script.get_stack_version() if not target in HADOOP_DIR_DEFAULTS: raise Fail("Target {0} not defined".format(target)) hadoop_dir = HADOOP_DIR_DEFAULTS[target] formatted_stack_version = format_stack_version(stack_version) if stack_features.check_stack_feature(StackFeature.ROLLING_UPGRADE, formatted_stack_version): # read the desired version from the component map and use that for building the hadoop home version = component_version.get_component_repository_version() if version is None: version = default("/commandParams/version", None) # home uses a different template if target == "home": hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, version, "hadoop") if version is None or sudo.path_isdir(hadoop_dir) is False: hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client") else: hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, version, "hadoop", target) if version is None or sudo.path_isdir(hadoop_dir) is False: hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client", target) return hadoop_dir
def get_hadoop_conf_dir(): """ Return the hadoop shared conf directory which should be used for the command's component. The directory including the component's version is tried first, but if that doesn't exist, this will fallback to using "current". """ stack_root = Script.get_stack_root() stack_version = Script.get_stack_version() hadoop_conf_dir = os.path.join(os.path.sep, "etc", "hadoop", "conf") if check_stack_feature(StackFeature.CONFIG_VERSIONING, stack_version): # read the desired version from the component map and use that for building the hadoop home version = component_version.get_component_repository_version() if version is None: version = default("/commandParams/version", None) hadoop_conf_dir = os.path.join(stack_root, str(version), "hadoop", "conf") if version is None or sudo.path_isdir(hadoop_conf_dir) is False: hadoop_conf_dir = os.path.join(stack_root, "current", "hadoop-client", "conf") Logger.info("Using hadoop conf dir: {0}".format(hadoop_conf_dir)) return hadoop_conf_dir
def _prepare_tez_tarball(): """ Prepares the Tez tarball by adding the Hadoop native libraries found in the mapreduce tarball. It's very important to use the version of mapreduce which matches tez here. Additionally, this will also copy native LZO to the tez tarball if LZO is enabled and the GPL license has been accepted. :return: the full path of the newly created tez tarball to use """ import tempfile Logger.info("Preparing the Tez tarball...") # get the mapreduce tarball which matches the version of tez # tez installs the mapreduce tar, so it should always be present _, mapreduce_source_file, _, _ = get_tarball_paths("mapreduce") _, tez_source_file, _, _ = get_tarball_paths("tez") temp_dir = Script.get_tmp_dir() # create the temp staging directories ensuring that non-root agents using tarfile can work with them mapreduce_temp_dir = tempfile.mkdtemp(prefix="mapreduce-tarball-", dir=temp_dir) tez_temp_dir = tempfile.mkdtemp(prefix="tez-tarball-", dir=temp_dir) sudo.chmod(mapreduce_temp_dir, 0777) sudo.chmod(tez_temp_dir, 0777) Logger.info("Extracting {0} to {1}".format(mapreduce_source_file, mapreduce_temp_dir)) tar_archive.extract_archive(mapreduce_source_file, mapreduce_temp_dir) Logger.info("Extracting {0} to {1}".format(tez_source_file, tez_temp_dir)) tar_archive.untar_archive(tez_source_file, tez_temp_dir) hadoop_lib_native_dir = os.path.join(mapreduce_temp_dir, "hadoop", "lib", "native") tez_lib_dir = os.path.join(tez_temp_dir, "lib") if not os.path.exists(hadoop_lib_native_dir): raise Fail( "Unable to seed the Tez tarball with native libraries since the source Hadoop native lib directory {0} does not exist" .format(hadoop_lib_native_dir)) if not os.path.exists(tez_lib_dir): raise Fail( "Unable to seed the Tez tarball with native libraries since the target Tez lib directory {0} does not exist" .format(tez_lib_dir)) # copy native libraries from hadoop to tez Execute(("cp", "-a", hadoop_lib_native_dir, tez_lib_dir), sudo=True) # if enabled, LZO GPL libraries must be copied as well if lzo_utils.should_install_lzo(): stack_root = Script.get_stack_root() service_version = component_version.get_component_repository_version( service_name="TEZ") # some installations might not have Tez, but MapReduce2 should be a fallback to get the LZO libraries from if service_version is None: Logger.warning( "Tez does not appear to be installed, using the MapReduce version to get the LZO libraries" ) service_version = component_version.get_component_repository_version( service_name="MAPREDUCE2") hadoop_lib_native_lzo_dir = os.path.join(stack_root, service_version, "hadoop", "lib", "native") if not sudo.path_isdir(hadoop_lib_native_lzo_dir): Logger.warning( "Unable to located native LZO libraries at {0}, falling back to hadoop home" .format(hadoop_lib_native_lzo_dir)) hadoop_lib_native_lzo_dir = os.path.join(stack_root, "current", "hadoop-client", "lib", "native") if not sudo.path_isdir(hadoop_lib_native_lzo_dir): raise Fail( "Unable to seed the Tez tarball with native libraries since LZO is enabled but the native LZO libraries could not be found at {0}" .format(hadoop_lib_native_lzo_dir)) Execute(("cp", "-a", hadoop_lib_native_lzo_dir, tez_lib_dir), sudo=True) # ensure that the tez/lib directory is readable by non-root (which it typically is not) Directory(tez_lib_dir, mode=0755, cd_access='a', recursive_ownership=True) # create the staging directory so that non-root agents can write to it tez_native_tarball_staging_dir = os.path.join( temp_dir, "tez-native-tarball-staging") if not os.path.exists(tez_native_tarball_staging_dir): Directory(tez_native_tarball_staging_dir, mode=0777, cd_access='a', create_parents=True, recursive_ownership=True) tez_tarball_with_native_lib = os.path.join(tez_native_tarball_staging_dir, "tez-native.tar.gz") Logger.info("Creating a new Tez tarball at {0}".format( tez_tarball_with_native_lib)) # tar up Tez, making sure to specify nothing for the arcname so that it does not include an absolute path with closing(tarfile.open(tez_tarball_with_native_lib, "w:gz")) as new_tez_tarball: new_tez_tarball.add(tez_temp_dir, arcname=os.path.sep) # ensure that the tarball can be read and uploaded sudo.chmod(tez_tarball_with_native_lib, 0744) # cleanup sudo.rmtree(mapreduce_temp_dir) sudo.rmtree(tez_temp_dir) return tez_tarball_with_native_lib
mapred_role_root = "hadoop-mapreduce-client" command_role = default("/role", "") if command_role in MAPR_SERVER_ROLE_DIRECTORY_MAP: mapred_role_root = MAPR_SERVER_ROLE_DIRECTORY_MAP[command_role] # YARN directory root yarn_role_root = "hadoop-yarn-client" if command_role in YARN_SERVER_ROLE_DIRECTORY_MAP: yarn_role_root = YARN_SERVER_ROLE_DIRECTORY_MAP[command_role] # defaults set to current based on role hadoop_mapr_home = format("{stack_root}/current/{mapred_role_root}") hadoop_yarn_home = format("{stack_root}/current/{yarn_role_root}") # try to render the specific version version = component_version.get_component_repository_version() if version is None: version = default("/commandParams/version", None) if version is not None: hadoop_mapr_versioned_home = format( "{stack_root}/{version}/hadoop-mapreduce") hadoop_yarn_versioned_home = format( "{stack_root}/{version}/hadoop-yarn") if sudo.path_isdir(hadoop_mapr_versioned_home): hadoop_mapr_home = hadoop_mapr_versioned_home if sudo.path_isdir(hadoop_yarn_versioned_home): hadoop_yarn_home = hadoop_yarn_versioned_home
'hadoop-yarn-resourcemanager') major_stack_version = get_major_version(stack_version_formatted_major) stack_supports_ru = check_stack_feature(StackFeature.ROLLING_UPGRADE, version_for_stack_feature_checks) stack_supports_timeline_state_store = check_stack_feature( StackFeature.TIMELINE_STATE_STORE, version_for_stack_feature_checks) # New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade. # It cannot be used during the initial Cluser Install because the version is not yet known. version = default("/commandParams/version", None) # these are used to render the classpath for picking up Spark classes # in the event that spark is not installed, then we must default to the vesrion of YARN installed # since it will still load classes from its own spark version spark_version = component_version.get_component_repository_version( service_name="SPARK", component_name="SPARK_CLIENT", default_value=version) spark2_version = component_version.get_component_repository_version( service_name="SPARK2", component_name="SPARK2_CLIENT", default_value=version) stack_supports_ranger_kerberos = check_stack_feature( StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks) stack_supports_ranger_audit_db = check_stack_feature( StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks) hostname = config['hostname'] # hadoop default parameters hadoop_home = status_params.hadoop_home hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec")