Пример #1
0
def should_install_falcon_atlas_hook():
  config = Script.get_config()
  stack_version_unformatted = config['hostLevelParams']['stack_version']
  stack_version_formatted = format_stack_version(stack_version_unformatted)
  if check_stack_feature(StackFeature.FALCON_ATLAS_SUPPORT_2_3, stack_version_formatted) \
      or check_stack_feature(StackFeature.FALCON_ATLAS_SUPPORT, stack_version_formatted):
    return _has_applicable_local_component(config, ['FALCON_SERVER'])
  return False
  def pre_upgrade_restart(self, env, upgrade_type=None):
    import params

    env.set_params(params)
    if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version):
      Logger.info("Executing Spark2 Client Stack Upgrade pre-restart")
      conf_select.select(params.stack_name, "spark", params.version)
      stack_select.select("spark2-client", params.version)
  def pre_upgrade_restart(self, env, upgrade_type=None):
    import params

    env.set_params(params)
    if params.version and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.version):
      Logger.info("Executing Spark2 Job History Server Stack Upgrade pre-restart")
      conf_select.select(params.stack_name, "spark2", params.version)
      stack_select.select("spark2-historyserver", params.version)

      # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not
      # need to copy the tarball, otherwise, copy it.
      if params.version and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.version):
        resource_created = copy_to_hdfs(
          "tez",
          params.user_group,
          params.hdfs_user,
          host_sys_prepped=params.host_sys_prepped)
        if resource_created:
          params.HdfsResource(None, action="execute")
Пример #4
0
def get_hadoop_dir(target, force_latest_on_upgrade=False):
  """
  Return the hadoop shared directory in the following override order
  1. Use default for 2.1 and lower
  2. If 2.2 and higher, use <stack-root>/current/hadoop-client/{target}
  3. If 2.2 and higher AND for an upgrade, use <stack-root>/<version>/hadoop/{target}.
  However, if the upgrade has not yet invoked <stack-selector-tool>, return the current
  version of the component.
  :target: the target directory
  :force_latest_on_upgrade: if True, then this will return the "current" directory
  without the stack version built into the path, such as <stack-root>/current/hadoop-client
  """
  stack_root = Script.get_stack_root()
  stack_version = Script.get_stack_version()

  if not target in HADOOP_DIR_DEFAULTS:
    raise Fail("Target {0} not defined".format(target))

  hadoop_dir = HADOOP_DIR_DEFAULTS[target]

  formatted_stack_version = format_stack_version(stack_version)
  if formatted_stack_version and  check_stack_feature(StackFeature.ROLLING_UPGRADE, formatted_stack_version):
    # home uses a different template
    if target == "home":
      hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client")
    else:
      hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client", target)

    # if we are not forcing "current" for HDP 2.2, then attempt to determine
    # if the exact version needs to be returned in the directory
    if not force_latest_on_upgrade:
      stack_info = _get_upgrade_stack()

      if stack_info is not None:
        stack_version = stack_info[1]

        # determine if <stack-selector-tool> has been run and if not, then use the current
        # hdp version until this component is upgraded
        current_stack_version = get_role_component_current_stack_version()
        if current_stack_version is not None and stack_version != current_stack_version:
          stack_version = current_stack_version

        if target == "home":
          # home uses a different template
          hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop")
        else:
          hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop", target)

  return hadoop_dir
Пример #5
0
 def should_expose_component_version(self, command_name):
   """
   Analyzes config and given command to determine if stack version should be written
   to structured out. Currently only HDP stack versions >= 2.2 are supported.
   :param command_name: command name
   :return: True or False
   """
   from resource_management.libraries.functions.default import default
   stack_version_unformatted = str(default("/hostLevelParams/stack_version", ""))
   stack_version_formatted = format_stack_version(stack_version_unformatted)
   if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted):
     if command_name.lower() == "status":
       request_version = default("/commandParams/request_version", None)
       if request_version is not None:
         return True
     else:
       # Populate version only on base commands
       return command_name.lower() == "start" or command_name.lower() == "install" or command_name.lower() == "restart"
   return False
Пример #6
0
def get_lzo_packages(stack_version_unformatted):
  lzo_packages = []
  script_instance = Script.get_instance()
  if OSCheck.is_suse_family() and int(OSCheck.get_os_major_version()) >= 12:
    lzo_packages += ["liblzo2-2", "hadoop-lzo-native"]
  elif OSCheck.is_redhat_family() or OSCheck.is_suse_family():
    lzo_packages += ["lzo", "hadoop-lzo-native"]
  elif OSCheck.is_ubuntu_family():
    lzo_packages += ["liblzo2-2"]

  if stack_version_unformatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_unformatted):
    if OSCheck.is_ubuntu_family():
      lzo_packages += [script_instance.format_package_name("hadooplzo-${stack_version}") ,
                       script_instance.format_package_name("hadooplzo-${stack_version}-native")]
    else:
      lzo_packages += [script_instance.format_package_name("hadooplzo_${stack_version}"),
                       script_instance.format_package_name("hadooplzo_${stack_version}-native")]
  else:
    lzo_packages += ["hadoop-lzo"]

  return lzo_packages
Пример #7
0
  def get_config_dir_during_stack_upgrade(self, env, base_dir, conf_select_name):
    """
    Because this gets called during a Rolling Upgrade, the new configs have already been saved, so we must be
    careful to only call configure() on the directory with the new version.

    If valid, returns the config directory to save configs to, otherwise, return None
    """
    import params
    env.set_params(params)

    required_attributes = ["stack_name", "stack_root", "version"]
    for attribute in required_attributes:
      if not hasattr(params, attribute):
        raise Fail("Failed in function 'stack_upgrade_save_new_config' because params was missing variable %s." % attribute)

    Logger.info("stack_upgrade_save_new_config(): Checking if can write new client configs to new config version folder.")

    if check_stack_feature(StackFeature.CONFIG_VERSIONING, params.version):
      # Even though hdp-select has not yet been called, write new configs to the new config directory.
      config_path = os.path.join(params.stack_root, params.version, conf_select_name, "conf")
      return os.path.realpath(config_path)
    return None
Пример #8
0
def get_hadoop_dir_for_stack_version(target, stack_version):
  """
  Return the hadoop shared directory for the provided stack version. This is necessary
  when folder paths of downgrade-source stack-version are needed after <stack-selector-tool>.
  :target: the target directory
  :stack_version: stack version to get hadoop dir for
  """

  stack_root = Script.get_stack_root()
  if not target in HADOOP_DIR_DEFAULTS:
    raise Fail("Target {0} not defined".format(target))

  hadoop_dir = HADOOP_DIR_DEFAULTS[target]

  formatted_stack_version = format_stack_version(stack_version)
  if formatted_stack_version and  check_stack_feature(StackFeature.ROLLING_UPGRADE, formatted_stack_version):
    # home uses a different template
    if target == "home":
      hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop")
    else:
      hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop", target)

  return hadoop_dir
Пример #9
0
stack_name = status_params.stack_name
current_version = default("/hostLevelParams/current_version", None)
component_directory = status_params.component_directory

# New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade
version = default("/commandParams/version", None)

# default parameters
zk_home = "/usr"
zk_bin = "/usr/lib/zookeeper/bin"
zk_cli_shell = "/usr/lib/zookeeper/bin/zkCli.sh"
config_dir = "/etc/zookeeper/conf"
zk_smoke_out = os.path.join(tmp_dir, "zkSmoke.out")

# hadoop parameters for stacks that support rolling_upgrade
if stack_version_formatted and check_stack_feature(
        StackFeature.ROLLING_UPGRADE, stack_version_formatted):
    zk_home = format("{stack_root}/current/{component_directory}")
    zk_bin = format("{stack_root}/current/{component_directory}/bin")
    zk_cli_shell = format(
        "{stack_root}/current/{component_directory}/bin/zkCli.sh")
    config_dir = status_params.config_dir

zk_user = config['configurations']['zookeeper-env']['zk_user']
hostname = config['hostname']
user_group = config['configurations']['cluster-env']['user_group']
zk_env_sh_template = config['configurations']['zookeeper-env']['content']

zk_log_dir = config['configurations']['zookeeper-env']['zk_log_dir']
zk_data_dir = config['configurations']['zoo.cfg']['dataDir']
zk_pid_dir = status_params.zk_pid_dir
zk_pid_file = status_params.zk_pid_file
Пример #10
0
def copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type=None,
                                          upgrade_direction=None):
    """
   If the Atlas Hive Hook direcotry is present, Atlas is installed, and this is the first Oozie Server,
  then copy the entire contents of that directory to the Oozie Sharelib in DFS, e.g.,
  /usr/$stack/$current_version/atlas/hook/hive/ -> hdfs:///user/oozie/share/lib/lib_$timetamp/hive

  :param upgrade_type: If in the middle of a stack upgrade, the type as UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING
  :param upgrade_direction: If in the middle of a stack upgrade, the direction as Direction.UPGRADE or Direction.DOWNGRADE.
  """
    import params

    # Calculate the effective version since this code can also be called during EU/RU in the upgrade direction.
    effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(
        params.version)
    if not check_stack_feature(StackFeature.ATLAS_HOOK_SUPPORT,
                               effective_version):
        return

    # Important that oozie_server_hostnames is sorted by name so that this only runs on a single Oozie server.
    if not (len(params.oozie_server_hostnames) > 0
            and params.hostname == params.oozie_server_hostnames[0]):
        Logger.debug(
            "Will not attempt to copy Atlas Hive hook to DFS since this is not the first Oozie Server "
            "sorted by hostname.")
        return

    if not has_atlas_in_cluster():
        Logger.debug(
            "Will not attempt to copy Atlas Hve hook to DFS since Atlas is not installed on the cluster."
        )
        return

    if upgrade_type is not None and upgrade_direction == Direction.DOWNGRADE:
        Logger.debug(
            "Will not attempt to copy Atlas Hve hook to DFS since in the middle of Rolling/Express upgrade "
            "and performing a Downgrade.")
        return

    current_version = get_current_version()
    atlas_hive_hook_dir = format(
        "{stack_root}/{current_version}/atlas/hook/hive/")
    if not os.path.exists(atlas_hive_hook_dir):
        Logger.error(
            format(
                "ERROR. Atlas is installed in cluster but this Oozie server doesn't "
                "contain directory {atlas_hive_hook_dir}"))
        return

    atlas_hive_hook_impl_dir = os.path.join(atlas_hive_hook_dir,
                                            "atlas-hive-plugin-impl")

    num_files = len([
        name for name in os.listdir(atlas_hive_hook_impl_dir)
        if os.path.exists(os.path.join(atlas_hive_hook_impl_dir, name))
    ])
    Logger.info(
        "Found %d files/directories inside Atlas Hive hook impl directory %s" %
        (num_files, atlas_hive_hook_impl_dir))

    # This can return over 100 files, so take the first 5 lines after "Available ShareLib"
    # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL
    command = format(
        r'source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -shareliblist hive | grep "\[Available ShareLib\]" -A 5'
    )
    Execute(
        command,
        user=params.oozie_user,
        tries=10,
        try_sleep=5,
        logoutput=True,
    )

    hive_sharelib_dir = __parse_sharelib_from_output(out)

    if hive_sharelib_dir is None:
        raise Fail("Could not parse Hive sharelib from output.")

    Logger.info(
        format(
            "Parsed Hive sharelib = {hive_sharelib_dir} and will attempt to copy/replace {num_files} files to it from {atlas_hive_hook_impl_dir}"
        ))

    params.HdfsResource(hive_sharelib_dir,
                        type="directory",
                        action="create_on_execute",
                        source=atlas_hive_hook_impl_dir,
                        user=params.hdfs_user,
                        owner=params.oozie_user,
                        group=params.hdfs_user,
                        mode=0755,
                        recursive_chown=True,
                        recursive_chmod=True,
                        replace_existing_files=True)

    Logger.info(
        "Copying Atlas Hive hook properties file to Oozie Sharelib in DFS.")
    atlas_hook_filepath_source = os.path.join(params.hive_conf_dir,
                                              params.atlas_hook_filename)
    atlas_hook_file_path_dest_in_dfs = os.path.join(hive_sharelib_dir,
                                                    params.atlas_hook_filename)
    params.HdfsResource(atlas_hook_file_path_dest_in_dfs,
                        type="file",
                        source=atlas_hook_filepath_source,
                        action="create_on_execute",
                        owner=params.oozie_user,
                        group=params.hdfs_user,
                        mode=0755,
                        replace_existing_files=True)
    params.HdfsResource(None, action="execute")

    # Update the sharelib after making any changes
    # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL
    Execute(
        format(
            "source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -sharelibupdate"
        ),
        user=params.oozie_user,
        tries=5,
        try_sleep=5,
        logoutput=True,
    )
Пример #11
0
def oozie(is_server=False):
    import params

    if is_server:
        params.HdfsResource(params.oozie_hdfs_user_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.oozie_user,
                            mode=params.oozie_hdfs_user_mode)
        params.HdfsResource(None, action="execute")
    Directory(params.conf_dir,
              create_parents=True,
              owner=params.oozie_user,
              group=params.user_group)
    XmlConfig(
        "oozie-site.xml",
        conf_dir=params.conf_dir,
        configurations=params.oozie_site,
        configuration_attributes=params.config['configuration_attributes']
        ['oozie-site'],
        owner=params.oozie_user,
        group=params.user_group,
        mode=0664)
    File(
        format("{conf_dir}/oozie-env.sh"),
        owner=params.oozie_user,
        content=InlineTemplate(params.oozie_env_sh_template),
        group=params.user_group,
    )

    # On some OS this folder could be not exists, so we will create it before pushing there files
    Directory(params.limits_conf_dir,
              create_parents=True,
              owner='root',
              group='root')

    File(os.path.join(params.limits_conf_dir, 'oozie.conf'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("oozie.conf.j2"))

    if (params.log4j_props != None):
        File(format("{params.conf_dir}/oozie-log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.oozie_user,
             content=InlineTemplate(params.log4j_props))
    elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))):
        File(format("{params.conf_dir}/oozie-log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.oozie_user)

    if params.stack_version_formatted and check_stack_feature(
            StackFeature.OOZIE_ADMIN_USER, params.stack_version_formatted):
        File(format("{params.conf_dir}/adminusers.txt"),
             mode=0644,
             group=params.user_group,
             owner=params.oozie_user,
             content=Template('adminusers.txt.j2',
                              oozie_admin_users=params.oozie_admin_users))
    else:
        File(format("{params.conf_dir}/adminusers.txt"),
             owner=params.oozie_user,
             group=params.user_group)

    if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \
       params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \
       params.jdbc_driver_name == "org.postgresql.Driver" or \
       params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver":
        File(
            format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"),
            content=DownloadSource(
                format("{jdk_location}{check_db_connection_jar_name}")),
        )
    pass

    oozie_ownership()

    if is_server:
        oozie_server_specific()
Пример #12
0
# server configurations
config = Script.get_config()
tmp_dir = Script.get_tmp_dir()

stack_name = default("/hostLevelParams/stack_name", None)
stack_root = Script.get_stack_root()
tarball_map = default("/configurations/cluster-env/tarball_map", None)

# This is expected to be of the form #.#.#.#
stack_version_unformatted = config['hostLevelParams']['stack_version']
stack_version_formatted_major = format_stack_version(stack_version_unformatted)
stack_version_formatted = functions.get_stack_version(
    'hadoop-yarn-resourcemanager')

stack_supports_ru = stack_version_formatted_major and check_stack_feature(
    StackFeature.ROLLING_UPGRADE, stack_version_formatted_major)
stack_supports_timeline_state_store = stack_version_formatted_major and check_stack_feature(
    StackFeature.TIMELINE_STATE_STORE, stack_version_formatted_major)

# New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade.
# It cannot be used during the initial Cluser Install because the version is not yet known.
version = default("/commandParams/version", None)

hostname = config['hostname']

# hadoop default parameters
hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec")
hadoop_bin = stack_select.get_hadoop_dir("sbin")
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
hadoop_yarn_home = '/usr/lib/hadoop-yarn'
Пример #13
0

# server configurations
config = Script.get_config()
exec_tmp_dir = Script.get_tmp_dir()
stack_root = Script.get_stack_root()

# Needed since this is an Atlas Hook service.
cluster_name = config['clusterName']

java_version = expect("/hostLevelParams/java_version", int)

zk_root = default(
    '/configurations/application-properties/atlas.server.ha.zookeeper.zkroot',
    '/apache_atlas')
stack_supports_zk_security = check_stack_feature(
    StackFeature.SECURE_ZOOKEEPER, version_for_stack_feature_checks)
atlas_kafka_group_id = default(
    '/configurations/application-properties/atlas.kafka.hook.group.id', None)

if security_enabled:
    _hostname_lowercase = config['hostname'].lower()
    _atlas_principal_name = config['configurations']['application-properties'][
        'atlas.authentication.principal']
    atlas_jaas_principal = _atlas_principal_name.replace(
        '_HOST', _hostname_lowercase)
    atlas_keytab_path = config['configurations']['application-properties'][
        'atlas.authentication.keytab']

# New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade
version = default("/commandParams/version", None)
version_for_stack_feature_checks = get_stack_feature_version(config)
Пример #14
0
java_home = config['hostLevelParams']['java_home']
stack_name = status_params.stack_name
stack_root = Script.get_stack_root()

version_for_stack_feature_checks = get_stack_feature_version(config)

sysprep_skip_copy_tarballs_hdfs = get_sysprep_skip_copy_tarballs_hdfs()

# New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade
version = default("/commandParams/version", None)

spark_conf = '/etc/spark/conf'
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")

if check_stack_feature(StackFeature.ROLLING_UPGRADE,
                       version_for_stack_feature_checks):
    hadoop_home = stack_select.get_hadoop_dir("home")
    spark_conf = format("{stack_root}/current/{component_directory}/conf")
    spark_log_dir = config['configurations']['spark-env']['spark_log_dir']
    spark_daemon_memory = config['configurations']['spark-env'][
        'spark_daemon_memory']
    spark_pid_dir = status_params.spark_pid_dir
    spark_home = format("{stack_root}/current/{component_directory}")

spark_thrift_server_conf_file = spark_conf + "/spark-thrift-sparkconf.conf"
java_home = config['hostLevelParams']['java_home']

hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
hdfs_principal_name = config['configurations']['hadoop-env'][
    'hdfs_principal_name']
hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
Пример #15
0
stack_version_unformatted = config['clusterLevelParams']['stack_version']
stack_version_formatted = format_stack_version(stack_version_unformatted)
major_stack_version = get_major_version(stack_version_formatted)

upgrade_marker_file = format("{tmp_dir}/rangeradmin_ru.inprogress")

xml_configurations_supported = config['configurations']['ranger-env'][
    'xml_configurations_supported']

create_db_dbuser = config['configurations']['ranger-env']['create_db_dbuser']

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)

stack_supports_rolling_upgrade = check_stack_feature(
    StackFeature.ROLLING_UPGRADE, version_for_stack_feature_checks)
stack_supports_config_versioning = check_stack_feature(
    StackFeature.CONFIG_VERSIONING, version_for_stack_feature_checks)
stack_supports_usersync_non_root = check_stack_feature(
    StackFeature.RANGER_USERSYNC_NON_ROOT, version_for_stack_feature_checks)
stack_supports_ranger_tagsync = check_stack_feature(
    StackFeature.RANGER_TAGSYNC_COMPONENT, version_for_stack_feature_checks)
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)
stack_supports_ranger_log4j = check_stack_feature(
    StackFeature.RANGER_LOG4J_SUPPORT, version_for_stack_feature_checks)
stack_supports_ranger_kerberos = check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks)
stack_supports_usersync_passwd = check_stack_feature(
    StackFeature.RANGER_USERSYNC_PASSWORD_JCEKS,
    version_for_stack_feature_checks)
Пример #16
0
falcon_server_hosts = default("/clusterHostInfo/falcon_server_hosts", [])
ranger_admin_hosts = default("/clusterHostInfo/ranger_admin_hosts", [])
zeppelin_master_hosts = default("/clusterHostInfo/zeppelin_master_hosts", [])

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)

has_namenode = not len(namenode_host) == 0
has_ganglia_server = not len(ganglia_server_hosts) == 0
has_tez = 'tez-site' in config['configurations']
has_hbase_masters = not len(hbase_master_hosts) == 0
has_oozie_server = not len(oozie_servers) == 0
has_falcon_server_hosts = not len(falcon_server_hosts) == 0
has_ranger_admin = not len(ranger_admin_hosts) == 0
has_zeppelin_master = not len(zeppelin_master_hosts) == 0
stack_supports_zk_security = check_stack_feature(StackFeature.SECURE_ZOOKEEPER, version_for_stack_feature_checks)

# HDFS High Availability properties
dfs_ha_enabled = False
dfs_ha_nameservices = default('/configurations/hdfs-site/dfs.internal.nameservices', None)
if dfs_ha_nameservices is None:
  dfs_ha_nameservices = default('/configurations/hdfs-site/dfs.nameservices', None)
dfs_ha_namenode_ids = default(format("/configurations/hdfs-site/dfs.ha.namenodes.{dfs_ha_nameservices}"), None)
if dfs_ha_namenode_ids:
  dfs_ha_namemodes_ids_list = dfs_ha_namenode_ids.split(",")
  dfs_ha_namenode_ids_array_len = len(dfs_ha_namemodes_ids_list)
  if dfs_ha_namenode_ids_array_len > 1:
    dfs_ha_enabled = True


if has_namenode or dfs_type == 'HCFS':
Пример #17
0
def setup_ranger_hdfs(upgrade_type=None):
    import params

    if params.enable_ranger_hdfs:

        stack_version = None

        if upgrade_type is not None:
            stack_version = params.version

        if params.retryAble:
            Logger.info(
                "HDFS: Setup ranger: command retry enables thus retrying if ranger admin is down !"
            )
        else:
            Logger.info(
                "HDFS: Setup ranger: command retry not enabled thus skipping if ranger admin is down !"
            )

        if params.xml_configurations_supported:
            from resource_management.libraries.functions.adh_setup_ranger_plugin_xml import setup_ranger_plugin
            api_version = None
            if params.stack_supports_ranger_kerberos:
                api_version = 'v2'
            setup_ranger_plugin(
                'hadoop-client',
                'hdfs',
                params.previous_jdbc_jar,
                params.downloaded_custom_connector,
                params.driver_curl_source,
                params.driver_curl_target,
                params.java_home,
                params.repo_name,
                params.hdfs_ranger_plugin_repo,
                params.ranger_env,
                params.ranger_plugin_properties,
                params.policy_user,
                params.policymgr_mgr_url,
                params.enable_ranger_hdfs,
                conf_dict=params.hadoop_conf_dir,
                component_user=params.hdfs_user,
                component_group=params.user_group,
                cache_service_list=['hdfs'],
                plugin_audit_properties=params.config['configurations']
                ['ranger-hdfs-audit'],
                plugin_audit_attributes=params.
                config['configuration_attributes']['ranger-hdfs-audit'],
                plugin_security_properties=params.config['configurations']
                ['ranger-hdfs-security'],
                plugin_security_attributes=params.
                config['configuration_attributes']['ranger-hdfs-security'],
                plugin_policymgr_ssl_properties=params.config['configurations']
                ['ranger-hdfs-policymgr-ssl'],
                plugin_policymgr_ssl_attributes=params.config[
                    'configuration_attributes']['ranger-hdfs-policymgr-ssl'],
                component_list=['hadoop-client'],
                audit_db_is_enabled=params.xa_audit_db_is_enabled,
                credential_file=params.credential_file,
                xa_audit_db_password=params.xa_audit_db_password,
                ssl_truststore_password=params.ssl_truststore_password,
                ssl_keystore_password=params.ssl_keystore_password,
                api_version=api_version,
                stack_version_override=stack_version,
                skip_if_rangeradmin_down=not params.retryAble,
                is_security_enabled=params.security_enabled,
                is_stack_supports_ranger_kerberos=params.
                stack_supports_ranger_kerberos,
                component_user_principal=params.nn_principal_name
                if params.security_enabled else None,
                component_user_keytab=params.nn_keytab
                if params.security_enabled else None)
        else:
            from resource_management.libraries.functions.adh_setup_ranger_plugin import setup_ranger_plugin

            setup_ranger_plugin(
                'hadoop-client',
                'hdfs',
                params.previous_jdbc_jar,
                params.downloaded_custom_connector,
                params.driver_curl_source,
                params.driver_curl_target,
                params.java_home,
                params.repo_name,
                params.hdfs_ranger_plugin_repo,
                params.ranger_env,
                params.ranger_plugin_properties,
                params.policy_user,
                params.policymgr_mgr_url,
                params.enable_ranger_hdfs,
                conf_dict=params.hadoop_conf_dir,
                component_user=params.hdfs_user,
                component_group=params.user_group,
                cache_service_list=['hdfs'],
                plugin_audit_properties=params.config['configurations']
                ['ranger-hdfs-audit'],
                plugin_audit_attributes=params.
                config['configuration_attributes']['ranger-hdfs-audit'],
                plugin_security_properties=params.config['configurations']
                ['ranger-hdfs-security'],
                plugin_security_attributes=params.
                config['configuration_attributes']['ranger-hdfs-security'],
                plugin_policymgr_ssl_properties=params.config['configurations']
                ['ranger-hdfs-policymgr-ssl'],
                plugin_policymgr_ssl_attributes=params.config[
                    'configuration_attributes']['ranger-hdfs-policymgr-ssl'],
                component_list=['hadoop-client'],
                audit_db_is_enabled=params.xa_audit_db_is_enabled,
                credential_file=params.credential_file,
                xa_audit_db_password=params.xa_audit_db_password,
                ssl_truststore_password=params.ssl_truststore_password,
                ssl_keystore_password=params.ssl_keystore_password,
                stack_version_override=stack_version,
                skip_if_rangeradmin_down=not params.retryAble)

        if stack_version and params.upgrade_direction == Direction.UPGRADE:
            # when upgrading to stack remove_ranger_hdfs_plugin_env, this env file must be removed
            if check_stack_feature(StackFeature.REMOVE_RANGER_HDFS_PLUGIN_ENV,
                                   stack_version):
                source_file = os.path.join(params.hadoop_conf_dir,
                                           'set-hdfs-plugin-env.sh')
                target_file = source_file + ".bak"
                Execute(("mv", source_file, target_file),
                        sudo=True,
                        only_if=format("test -f {source_file}"))
    else:
        Logger.info('Ranger Hdfs plugin is not enabled')
Пример #18
0
stack_name = status_params.stack_name
upgrade_direction = default("/commandParams/upgrade_direction",
                            Direction.UPGRADE)
version = default("/commandParams/version", None)

agent_stack_retry_on_unavailability = config['hostLevelParams'][
    'agent_stack_retry_on_unavailability']
agent_stack_retry_count = expect("/hostLevelParams/agent_stack_retry_count",
                                 int)

storm_component_home_dir = status_params.storm_component_home_dir
conf_dir = status_params.conf_dir

stack_version_unformatted = status_params.stack_version_unformatted
stack_version_formatted = status_params.stack_version_formatted
stack_supports_ru = stack_version_formatted and check_stack_feature(
    StackFeature.ROLLING_UPGRADE, stack_version_formatted)
stack_supports_storm_kerberos = stack_version_formatted and check_stack_feature(
    StackFeature.STORM_KERBEROS, stack_version_formatted)
stack_supports_storm_ams = stack_version_formatted and check_stack_feature(
    StackFeature.STORM_AMS, stack_version_formatted)
stack_supports_ranger_kerberos = stack_version_formatted and check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, stack_version_formatted)

# default hadoop params
rest_lib_dir = "/usr/lib/storm/contrib/storm-rest"
storm_bin_dir = "/usr/bin"
storm_lib_dir = "/usr/lib/storm/lib/"

# hadoop parameters for 2.2+
if stack_supports_ru:
    rest_lib_dir = format("{storm_component_home_dir}/contrib/storm-rest")
Пример #19
0
major_stack_version = get_major_version(stack_version_formatted_major)

# New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade.
# It cannot be used during the initial Cluser Install because the version is not yet known.
version = default("/commandParams/version", None)

# When downgrading the 'version' is pointing to the downgrade-target version
# downgrade_from_version provides the source-version the downgrade is happening from
downgrade_from_version = upgrade_summary.get_downgrade_from_version("HIVE")

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)

# Upgrade direction
upgrade_direction = default("/commandParams/upgrade_direction", None)
stack_supports_ranger_kerberos = check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks)
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)
stack_supports_ranger_hive_jdbc_url_change = check_stack_feature(
    StackFeature.RANGER_HIVE_PLUGIN_JDBC_URL, version_for_stack_feature_checks)
stack_supports_atlas_hook_for_hive_interactive = check_stack_feature(
    StackFeature.HIVE_INTERACTIVE_ATLAS_HOOK_REQUIRED,
    version_for_stack_feature_checks)
stack_supports_hive_interactive_ga = check_stack_feature(
    StackFeature.HIVE_INTERACTIVE_GA_SUPPORT, version_for_stack_feature_checks)

# component ROLE directory (like hive-metastore or hive-server2-hive2)
component_directory = status_params.component_directory
component_directory_interactive = status_params.component_directory_interactive

hadoop_home = format('{stack_root}/current/hadoop-client')
Пример #20
0
kafka_home = '/usr/lib/kafka'
kafka_bin = kafka_home + '/bin/kafka'
conf_dir = "/etc/kafka/conf"
limits_conf_dir = "/etc/security/limits.d"

# Used while upgrading the stack in a kerberized cluster and running kafka-acls.sh
zookeeper_connect = default("/configurations/kafka-broker/zookeeper.connect",
                            None)

kafka_user_nofile_limit = config['configurations']['kafka-env'][
    'kafka_user_nofile_limit']
kafka_user_nproc_limit = config['configurations']['kafka-env'][
    'kafka_user_nproc_limit']

# parameters for 2.2+
if stack_version_formatted and check_stack_feature(
        StackFeature.ROLLING_UPGRADE, stack_version_formatted):
    kafka_home = os.path.join(stack_root, "current", "kafka-broker")
    kafka_bin = os.path.join(kafka_home, "bin", "kafka")
    conf_dir = os.path.join(kafka_home, "config")

kafka_user = config['configurations']['kafka-env']['kafka_user']
kafka_log_dir = config['configurations']['kafka-env']['kafka_log_dir']
kafka_pid_dir = status_params.kafka_pid_dir
kafka_pid_file = kafka_pid_dir + "/kafka.pid"
# This is hardcoded on the kafka bash process lifecycle on which we have no control over
kafka_managed_pid_dir = "/var/run/kafka"
kafka_managed_log_dir = "/var/log/kafka"
user_group = config['configurations']['cluster-env']['user_group']
java64_home = config['hostLevelParams']['java_home']
kafka_env_sh_template = config['configurations']['kafka-env']['content']
kafka_hosts = config['clusterHostInfo']['kafka_broker_hosts']
Пример #21
0
tmp_dir = Script.get_tmp_dir()
stack_name = status_params.stack_name
upgrade_direction = default("/commandParams/upgrade_direction", None)
version = default("/commandParams/version", None)
# E.g., 2.3.2.0
version_formatted = format_stack_version(version)

# E.g., 2.3
stack_version_unformatted = config['hostLevelParams']['stack_version']
stack_version_formatted = format_stack_version(stack_version_unformatted)

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)

stack_supports_ranger_kerberos = check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks)
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)
stack_supports_core_site_for_ranger_plugin = check_stack_feature(
    StackFeature.CORE_SITE_FOR_RANGER_PLUGINS_SUPPORT,
    version_for_stack_feature_checks)

# This is the version whose state is CURRENT. During an RU, this is the source version.
# DO NOT format it since we need the build number too.
upgrade_from_version = upgrade_summary.get_source_version()

source_stack = default("/commandParams/source_stack", None)
if source_stack is None:
    source_stack = upgrade_summary.get_source_stack("KNOX")
source_stack_name = get_stack_name(source_stack)
if source_stack_name is not None and source_stack_name != stack_name:
Пример #22
0
def webhcat():
    import params

    Directory(params.templeton_pid_dir,
              owner=params.webhcat_user,
              mode=0755,
              group=params.user_group,
              create_parents=True)

    Directory(params.templeton_log_dir,
              owner=params.webhcat_user,
              mode=0755,
              group=params.user_group,
              create_parents=True)

    Directory(params.config_dir,
              create_parents=True,
              owner=params.webhcat_user,
              group=params.user_group,
              cd_access="a")

    if params.security_enabled:
        kinit_if_needed = format(
            "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};")
    else:
        kinit_if_needed = ""

    if kinit_if_needed:
        Execute(kinit_if_needed, user=params.webhcat_user, path='/bin')

    # Replace _HOST with hostname in relevant principal-related properties
    webhcat_site = params.config['configurations']['webhcat-site'].copy()
    for prop_name in [
            'templeton.hive.properties', 'templeton.kerberos.principal'
    ]:
        if prop_name in webhcat_site:
            webhcat_site[prop_name] = webhcat_site[prop_name].replace(
                "_HOST", params.hostname)

    XmlConfig(
        "webhcat-site.xml",
        conf_dir=params.config_dir,
        configurations=webhcat_site,
        configuration_attributes=params.config['configuration_attributes']
        ['webhcat-site'],
        owner=params.webhcat_user,
        group=params.user_group,
    )

    # if we're in an upgrade of a secure cluster, make sure hive-site and yarn-site are created
    if params.stack_version_formatted_major and check_stack_feature(StackFeature.CONFIG_VERSIONING,
                                                                    params.stack_version_formatted_major) and \
            params.version and params.stack_root:
        XmlConfig(
            "hive-site.xml",
            conf_dir=params.config_dir,
            configurations=params.config['configurations']['hive-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['hive-site'],
            owner=params.hive_user,
            group=params.user_group,
        )

        XmlConfig(
            "yarn-site.xml",
            conf_dir=params.hadoop_conf_dir,
            configurations=params.config['configurations']['yarn-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['yarn-site'],
            owner=params.yarn_user,
            group=params.user_group,
        )

    File(format("{config_dir}/webhcat-env.sh"),
         owner=params.webhcat_user,
         group=params.user_group,
         content=InlineTemplate(params.webhcat_env_sh_template))

    Directory(params.webhcat_conf_dir, cd_access='a', create_parents=True)

    log4j_webhcat_filename = 'webhcat-log4j.properties'
    if (params.log4j_webhcat_props != None):
        File(format("{config_dir}/{log4j_webhcat_filename}"),
             mode=0644,
             group=params.user_group,
             owner=params.webhcat_user,
             content=params.log4j_webhcat_props)
    elif (os.path.exists("{config_dir}/{log4j_webhcat_filename}.template")):
        File(format("{config_dir}/{log4j_webhcat_filename}"),
             mode=0644,
             group=params.user_group,
             owner=params.webhcat_user,
             content=StaticFile(
                 format("{config_dir}/{log4j_webhcat_filename}.template")))

    # Generate atlas-application.properties.xml file
    if params.enable_atlas_hook:
        script_path = os.path.realpath(__file__).split(
            '/services')[0] + '/hooks/before-INSTALL/scripts/atlas'
        sys.path.append(script_path)
        from setup_atlas_hook import has_atlas_in_cluster, setup_atlas_hook, setup_atlas_jar_symlinks
        atlas_hook_filepath = os.path.join(params.hive_config_dir,
                                           params.atlas_hook_filename)
        setup_atlas_hook(SERVICE.HIVE,
                         params.hive_atlas_application_properties,
                         atlas_hook_filepath, params.hive_user,
                         params.user_group)
        setup_atlas_jar_symlinks("hive", params.hcat_lib)
Пример #23
0
def hbase(name=None):
  import params

  # ensure that matching LZO libraries are installed for HBase
  lzo_utils.install_lzo_if_needed()

  Directory( params.etc_prefix_dir,
      mode=0755
  )

  Directory( params.hbase_conf_dir,
      owner = params.hbase_user,
      group = params.user_group,
      create_parents = True
  )
   
  Directory(params.java_io_tmpdir,
      create_parents = True,
      mode=0777
  )

  # If a file location is specified in ioengine parameter,
  # ensure that directory exists. Otherwise create the
  # directory with permissions assigned to hbase:hadoop.
  ioengine_input = params.ioengine_param
  if ioengine_input != None:
    if ioengine_input.startswith("file:/"):
      ioengine_fullpath = ioengine_input[5:]
      ioengine_dir = os.path.dirname(ioengine_fullpath)
      Directory(ioengine_dir,
          owner = params.hbase_user,
          group = params.user_group,
          create_parents = True,
          mode = 0755
      )
  
  parent_dir = os.path.dirname(params.tmp_dir)
  # In case if we have several placeholders in path
  while ("${" in parent_dir):
    parent_dir = os.path.dirname(parent_dir)
  if parent_dir != os.path.abspath(os.sep) :
    Directory (parent_dir,
          create_parents = True,
          cd_access="a",
    )
    Execute(("chmod", "1777", parent_dir), sudo=True)

  XmlConfig( "hbase-site.xml",
            conf_dir = params.hbase_conf_dir,
            configurations = params.config['configurations']['hbase-site'],
            configuration_attributes=params.config['configuration_attributes']['hbase-site'],
            owner = params.hbase_user,
            group = params.user_group
  )

  if check_stack_feature(StackFeature.PHOENIX_CORE_HDFS_SITE_REQUIRED, params.version_for_stack_feature_checks):
    XmlConfig( "core-site.xml",
               conf_dir = params.hbase_conf_dir,
               configurations = params.config['configurations']['core-site'],
               configuration_attributes=params.config['configuration_attributes']['core-site'],
               owner = params.hbase_user,
               group = params.user_group
    )
    if 'hdfs-site' in params.config['configurations']:
      XmlConfig( "hdfs-site.xml",
              conf_dir = params.hbase_conf_dir,
              configurations = params.config['configurations']['hdfs-site'],
              configuration_attributes=params.config['configuration_attributes']['hdfs-site'],
              owner = params.hbase_user,
              group = params.user_group
      )
  else:
    File(format("{params.hbase_conf_dir}/hdfs-site.xml"),
         action="delete"
    )
    File(format("{params.hbase_conf_dir}/core-site.xml"),
         action="delete"
    )

  if 'hbase-policy' in params.config['configurations']:
    XmlConfig( "hbase-policy.xml",
            conf_dir = params.hbase_conf_dir,
            configurations = params.config['configurations']['hbase-policy'],
            configuration_attributes=params.config['configuration_attributes']['hbase-policy'],
            owner = params.hbase_user,
            group = params.user_group
    )
  # Manually overriding ownership of file installed by hadoop package
  else: 
    File( format("{params.hbase_conf_dir}/hbase-policy.xml"),
      owner = params.hbase_user,
      group = params.user_group
    )

  File(format("{hbase_conf_dir}/hbase-env.sh"),
       owner = params.hbase_user,
       content=InlineTemplate(params.hbase_env_sh_template),
       group = params.user_group,
  )
  
  # On some OS this folder could be not exists, so we will create it before pushing there files
  Directory(params.limits_conf_dir,
            create_parents = True,
            owner='root',
            group='root'
            )
  
  File(os.path.join(params.limits_conf_dir, 'hbase.conf'),
       owner='root',
       group='root',
       mode=0644,
       content=Template("hbase.conf.j2")
       )
    
  hbase_TemplateConfig( params.metric_prop_file_name,
    tag = 'GANGLIA-MASTER' if name == 'master' else 'GANGLIA-RS'
  )

  hbase_TemplateConfig( 'regionservers')

  if params.security_enabled:
    hbase_TemplateConfig( format("hbase_{name}_jaas.conf"))
  
  if name != "client":
    Directory( params.pid_dir,
      owner = params.hbase_user,
      create_parents = True,
      cd_access = "a",
      mode = 0755,
    )
  
    Directory (params.log_dir,
      owner = params.hbase_user,
      create_parents = True,
      cd_access = "a",
      mode = 0755,
    )

  if (params.log4j_props != None):
    File(format("{params.hbase_conf_dir}/log4j.properties"),
         mode=0644,
         group=params.user_group,
         owner=params.hbase_user,
         content=InlineTemplate(params.log4j_props)
    )
  elif (os.path.exists(format("{params.hbase_conf_dir}/log4j.properties"))):
    File(format("{params.hbase_conf_dir}/log4j.properties"),
      mode=0644,
      group=params.user_group,
      owner=params.hbase_user
    )
  if name == "master":
    if not params.hbase_hdfs_root_dir_protocol or params.hbase_hdfs_root_dir_protocol == urlparse(params.default_fs).scheme:
      params.HdfsResource(params.hbase_hdfs_root_dir,
                           type="directory",
                           action="create_on_execute",
                           owner=params.hbase_user
      )
    params.HdfsResource(params.hbase_staging_dir,
                         type="directory",
                         action="create_on_execute",
                         owner=params.hbase_user,
                         mode=0711
    )
    if params.create_hbase_home_directory:
      params.HdfsResource(params.hbase_home_directory,
                          type="directory",
                          action="create_on_execute",
                          owner=params.hbase_user,
                          mode=0755
      )
    params.HdfsResource(None, action="execute")

  if params.phoenix_enabled:
    Package(params.phoenix_package,
            retry_on_repo_unavailability=params.agent_stack_retry_on_unavailability,
            retry_count=params.agent_stack_retry_count)
Пример #24
0
def hive_interactive(name=None):
  import params
  MB_TO_BYTES = 1048576

  # if warehouse directory is in DFS
  if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse(params.default_fs).scheme:
    # Create Hive Metastore Warehouse Dir
    params.HdfsResource(params.hive_apps_whs_dir,
                        type="directory",
                        action="create_on_execute",
                        owner=params.hive_user,
                        group=params.user_group,
                        mode=params.hive_apps_whs_mode
                        )
  else:
    Logger.info(format("Not creating warehouse directory '{hive_apps_whs_dir}', as the location is not in DFS."))

  # Create Hive User Dir
  params.HdfsResource(params.hive_hdfs_user_dir,
                      type="directory",
                      action="create_on_execute",
                      owner=params.hive_user,
                      mode=params.hive_hdfs_user_mode
                      )

  # list of properties that should be excluded from the config
  # this approach is a compromise against adding a dedicated config
  # type for hive_server_interactive or needed config groups on a
  # per component basis
  exclude_list = ['hive.enforce.bucketing',
                  'hive.enforce.sorting']

  # List of configs to be excluded from hive2 client, but present in Hive2 server.
  exclude_list_for_hive2_client = ['javax.jdo.option.ConnectionPassword',
                                   'hadoop.security.credential.provider.path']

  # Copy Tarballs in HDFS.
  if params.stack_version_formatted_major and check_stack_feature(StackFeature.ROLLING_UPGRADE, params.stack_version_formatted_major):
    resource_created = copy_to_hdfs("tez_hive2",
                 params.user_group,
                 params.hdfs_user,
                 file_mode=params.tarballs_mode,
                 skip=params.sysprep_skip_copy_tarballs_hdfs)

    if resource_created:
      params.HdfsResource(None, action="execute")

  Directory(params.hive_interactive_etc_dir_prefix,
            mode=0755
            )

  Logger.info("Directories to fill with configs: %s" % str(params.hive_conf_dirs_list))
  for conf_dir in params.hive_conf_dirs_list:
    fill_conf_dir(conf_dir)

  '''
  As hive2/hive-site.xml only contains the new + the changed props compared to hive/hive-site.xml,
  we need to merge hive/hive-site.xml and hive2/hive-site.xml and store it in hive2/hive-site.xml.
  '''
  merged_hive_interactive_site = {}
  merged_hive_interactive_site.update(params.config['configurations']['hive-site'])
  merged_hive_interactive_site.update(params.config['configurations']['hive-interactive-site'])
  for item in exclude_list:
    if item in merged_hive_interactive_site.keys():
      del merged_hive_interactive_site[item]

  '''
  Config 'hive.llap.io.memory.size' calculated value in stack_advisor is in MB as of now. We need to
  convert it to bytes before we write it down to config file.
  '''
  if 'hive.llap.io.memory.size' in merged_hive_interactive_site.keys():
    hive_llap_io_mem_size_in_mb = merged_hive_interactive_site.get("hive.llap.io.memory.size")
    hive_llap_io_mem_size_in_bytes = long(hive_llap_io_mem_size_in_mb) * MB_TO_BYTES
    merged_hive_interactive_site['hive.llap.io.memory.size'] = hive_llap_io_mem_size_in_bytes
    Logger.info("Converted 'hive.llap.io.memory.size' value from '{0} MB' to '{1} Bytes' before writing "
                "it to config file.".format(hive_llap_io_mem_size_in_mb, hive_llap_io_mem_size_in_bytes))

  '''
  Hive2 doesn't have support for Atlas, we need to remove the Hook 'org.apache.atlas.hive.hook.HiveHook',
  which would have come in config 'hive.exec.post.hooks' during the site merge logic, if Atlas is installed.
  '''
  # Generate atlas-application.properties.xml file
  if params.enable_atlas_hook and params.stack_supports_atlas_hook_for_hive_interactive:
    Logger.info("Setup for Atlas Hive2 Hook started.")

    atlas_hook_filepath = os.path.join(params.hive_server_interactive_conf_dir, params.atlas_hook_filename)
    setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.hive_user, params.user_group)

    Logger.info("Setup for Atlas Hive2 Hook done.")
  else:
    # Required for HDP 2.5 stacks
    Logger.info("Skipping setup for Atlas Hook, as it is disabled/ not supported.")
    remove_atlas_hook_if_exists(merged_hive_interactive_site)

  '''
  As tez_hive2/tez-site.xml only contains the new + the changed props compared to tez/tez-site.xml,
  we need to merge tez/tez-site.xml and tez_hive2/tez-site.xml and store it in tez_hive2/tez-site.xml.
  '''
  merged_tez_interactive_site = {}
  if 'tez-site' in params.config['configurations']:
    merged_tez_interactive_site.update(params.config['configurations']['tez-site'])
    Logger.info("Retrieved 'tez/tez-site' for merging with 'tez_hive2/tez-interactive-site'.")
  else:
    Logger.error("Tez's 'tez-site' couldn't be retrieved from passed-in configurations.")

  merged_tez_interactive_site.update(params.config['configurations']['tez-interactive-site'])
  XmlConfig("tez-site.xml",
            conf_dir = params.tez_interactive_config_dir,
            configurations = merged_tez_interactive_site,
            configuration_attributes=params.config['configurationAttributes']['tez-interactive-site'],
            owner = params.tez_interactive_user,
            group = params.user_group,
            mode = 0664)

  '''
  Merge properties from hiveserver2-interactive-site into hiveserver2-site
  '''
  merged_hiveserver2_interactive_site = {}
  if 'hiveserver2-site' in params.config['configurations']:
    merged_hiveserver2_interactive_site.update(params.config['configurations']['hiveserver2-site'])
    Logger.info("Retrieved 'hiveserver2-site' for merging with 'hiveserver2-interactive-site'.")
  else:
    Logger.error("'hiveserver2-site' couldn't be retrieved from passed-in configurations.")
  merged_hiveserver2_interactive_site.update(params.config['configurations']['hiveserver2-interactive-site'])


  # Create config files under /etc/hive2/conf and /etc/hive2/conf/conf.server:
  #   hive-site.xml
  #   hive-env.sh
  #   llap-daemon-log4j2.properties
  #   llap-cli-log4j2.properties
  #   hive-log4j2.properties
  #   hive-exec-log4j2.properties
  #   beeline-log4j2.properties

  hive2_conf_dirs_list = params.hive_conf_dirs_list
  hive2_client_conf_path = format("{stack_root}/current/{component_directory}/conf")

  # Making copy of 'merged_hive_interactive_site' in 'merged_hive_interactive_site_copy', and deleting 'javax.jdo.option.ConnectionPassword'
  # config from there, as Hive2 client shouldn't have that config.
  merged_hive_interactive_site_copy = merged_hive_interactive_site.copy()
  for item in exclude_list_for_hive2_client:
    if item in merged_hive_interactive_site.keys():
      del merged_hive_interactive_site_copy[item]

  for conf_dir in hive2_conf_dirs_list:
      mode_identified = 0644 if conf_dir == hive2_client_conf_path else 0600
      if conf_dir == hive2_client_conf_path:
        XmlConfig("hive-site.xml",
                  conf_dir=conf_dir,
                  configurations=merged_hive_interactive_site_copy,
                  configuration_attributes=params.config['configurationAttributes']['hive-interactive-site'],
                  owner=params.hive_user,
                  group=params.user_group,
                  mode=0644)
      else:
        merged_hive_interactive_site = update_credential_provider_path(merged_hive_interactive_site,
                                                                  'hive-site',
                                                                  os.path.join(conf_dir, 'hive-site.jceks'),
                                                                  params.hive_user,
                                                                  params.user_group
        )
        XmlConfig("hive-site.xml",
                  conf_dir=conf_dir,
                  configurations=merged_hive_interactive_site,
                  configuration_attributes=params.config['configurationAttributes']['hive-interactive-site'],
                  owner=params.hive_user,
                  group=params.user_group,
                  mode=0600)
      XmlConfig("hiveserver2-site.xml",
                conf_dir=conf_dir,
                configurations=merged_hiveserver2_interactive_site,
                configuration_attributes=params.config['configurationAttributes']['hiveserver2-interactive-site'],
                owner=params.hive_user,
                group=params.user_group,
                mode=mode_identified)

      hive_server_interactive_conf_dir = conf_dir

      File(format("{hive_server_interactive_conf_dir}/hive-env.sh"),
           owner=params.hive_user,
           group=params.user_group,
           mode=mode_identified,
           content=InlineTemplate(params.hive_interactive_env_sh_template))

      llap_daemon_log4j_filename = 'llap-daemon-log4j2.properties'
      File(format("{hive_server_interactive_conf_dir}/{llap_daemon_log4j_filename}"),
           mode=mode_identified,
           group=params.user_group,
           owner=params.hive_user,
           content=InlineTemplate(params.llap_daemon_log4j))

      llap_cli_log4j2_filename = 'llap-cli-log4j2.properties'
      File(format("{hive_server_interactive_conf_dir}/{llap_cli_log4j2_filename}"),
           mode=mode_identified,
           group=params.user_group,
           owner=params.hive_user,
           content=InlineTemplate(params.llap_cli_log4j2))

      hive_log4j2_filename = 'hive-log4j2.properties'
      File(format("{hive_server_interactive_conf_dir}/{hive_log4j2_filename}"),
         mode=mode_identified,
         group=params.user_group,
         owner=params.hive_user,
         content=InlineTemplate(params.hive_log4j2))

      hive_exec_log4j2_filename = 'hive-exec-log4j2.properties'
      File(format("{hive_server_interactive_conf_dir}/{hive_exec_log4j2_filename}"),
         mode=mode_identified,
         group=params.user_group,
         owner=params.hive_user,
         content=InlineTemplate(params.hive_exec_log4j2))

      beeline_log4j2_filename = 'beeline-log4j2.properties'
      File(format("{hive_server_interactive_conf_dir}/{beeline_log4j2_filename}"),
         mode=mode_identified,
         group=params.user_group,
         owner=params.hive_user,
         content=InlineTemplate(params.beeline_log4j2))

      File(os.path.join(hive_server_interactive_conf_dir, "hadoop-metrics2-hiveserver2.properties"),
           owner=params.hive_user,
           group=params.user_group,
           mode=mode_identified,
           content=Template("hadoop-metrics2-hiveserver2.properties.j2")
           )

      File(format("{hive_server_interactive_conf_dir}/hadoop-metrics2-llapdaemon.properties"),
           owner=params.hive_user,
           group=params.user_group,
           mode=mode_identified,
           content=Template("hadoop-metrics2-llapdaemon.j2"))

      File(format("{hive_server_interactive_conf_dir}/hadoop-metrics2-llaptaskscheduler.properties"),
           owner=params.hive_user,
           group=params.user_group,
           mode=mode_identified,
           content=Template("hadoop-metrics2-llaptaskscheduler.j2"))


  # On some OS this folder could be not exists, so we will create it before pushing there files
  Directory(params.limits_conf_dir,
            create_parents = True,
            owner='root',
            group='root')

  File(os.path.join(params.limits_conf_dir, 'hive.conf'),
       owner='root',
       group='root',
       mode=0644,
       content=Template("hive.conf.j2"))

  if not os.path.exists(params.target_hive_interactive):
    jdbc_connector(params.target_hive_interactive, params.hive_intaractive_previous_jdbc_jar)

  File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"),
       content = DownloadSource(format("{jdk_location}/{check_db_connection_jar_name}")),
       mode = 0644)
  File(params.start_hiveserver2_interactive_path,
       mode=0755,
       content=Template(format('{start_hiveserver2_interactive_script}')))

  Directory(params.hive_pid_dir,
            create_parents=True,
            cd_access='a',
            owner=params.hive_user,
            group=params.user_group,
            mode=0755)
  Directory(params.hive_log_dir,
            create_parents=True,
            cd_access='a',
            owner=params.hive_user,
            group=params.user_group,
            mode=0755)
  Directory(params.hive_interactive_var_lib,
            create_parents=True,
            cd_access='a',
            owner=params.hive_user,
            group=params.user_group,
            mode=0755)
Пример #25
0
def service(action=None, name=None, user=None, options="", create_pid_dir=False,
            create_log_dir=False):
  """
  :param action: Either "start" or "stop"
  :param name: Component name, e.g., "namenode", "datanode", "secondarynamenode", "zkfc"
  :param user: User to run the command as
  :param options: Additional options to pass to command as a string
  :param create_pid_dir: Create PID directory
  :param create_log_dir: Crate log file directory
  """
  import params

  options = options if options else ""
  pid_dir = format("{hadoop_pid_dir_prefix}/{user}")
  pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid")
  hadoop_env_exports = {
    'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir
  }
  log_dir = format("{hdfs_log_dir_prefix}/{user}")

  # NFS GATEWAY is always started by root using jsvc due to rpcbind bugs
  # on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542
  if name == "nfs3" :
    pid_file = format("{pid_dir}/hadoop_privileged_nfs3.pid")
    custom_export = {
      'HADOOP_PRIVILEGED_NFS_USER': params.hdfs_user,
      'HADOOP_PRIVILEGED_NFS_PID_DIR': pid_dir,
      'HADOOP_PRIVILEGED_NFS_LOG_DIR': log_dir
    }
    hadoop_env_exports.update(custom_export)

  process_id_exists_command = as_sudo(["test", "-f", pid_file]) + " && " + as_sudo(["pgrep", "-F", pid_file])

  # on STOP directories shouldn't be created
  # since during stop still old dirs are used (which were created during previous start)
  if action != "stop":
    if name == "nfs3":
      Directory(params.hadoop_pid_dir_prefix,
                mode=0755,
                owner=params.root_user,
                group=params.root_group
      )
    else:
      Directory(params.hadoop_pid_dir_prefix,
                  mode=0755,
                  owner=params.hdfs_user,
                  group=params.user_group
      )
    if create_pid_dir:
      Directory(pid_dir,
                owner=user,
                group=params.user_group,
                create_parents = True)
    if create_log_dir:
      if name == "nfs3":
        Directory(log_dir,
                  mode=0775,
                  owner=params.root_user,
                  group=params.user_group)
      else:
        Directory(log_dir,
                  owner=user,
                  group=params.user_group,
                  create_parents = True)

  if params.security_enabled and name == "datanode":
    ## The directory where pid files are stored in the secure data environment.
    hadoop_secure_dn_pid_dir = format("{hadoop_pid_dir_prefix}/{hdfs_user}")
    hadoop_secure_dn_pid_file = format("{hadoop_secure_dn_pid_dir}/hadoop_secure_dn.pid")

    # At datanode_non_root stack version and further, we may start datanode as a non-root even in secure cluster
    if not (params.stack_version_formatted and check_stack_feature(StackFeature.DATANODE_NON_ROOT, params.stack_version_formatted)) or params.secure_dn_ports_are_in_use:
      user = "******"
      pid_file = format(
        "{hadoop_pid_dir_prefix}/{hdfs_user}/hadoop-{hdfs_user}-{name}.pid")

    if action == 'stop' and (params.stack_version_formatted and check_stack_feature(StackFeature.DATANODE_NON_ROOT, params.stack_version_formatted)) and \
      os.path.isfile(hadoop_secure_dn_pid_file):
        # We need special handling for this case to handle the situation
        # when we configure non-root secure DN and then restart it
        # to handle new configs. Otherwise we will not be able to stop
        # a running instance 
        user = "******"
        
        try:
          check_process_status(hadoop_secure_dn_pid_file)
          
          custom_export = {
            'HADOOP_SECURE_DN_USER': params.hdfs_user
          }
          hadoop_env_exports.update(custom_export)
          
        except ComponentIsNotRunning:
          pass

  hadoop_daemon = format("{hadoop_bin}/hadoop-daemon.sh")

  if user == "root":
    cmd = [hadoop_daemon, "--config", params.hadoop_conf_dir, action, name]
    if options:
      cmd += [options, ]
    daemon_cmd = as_sudo(cmd)
  else:
    cmd = format("{ulimit_cmd} {hadoop_daemon} --config {hadoop_conf_dir} {action} {name}")
    if options:
      cmd += " " + options
    daemon_cmd = as_user(cmd, user)
     
  if action == "start":
    # remove pid file from dead process
    File(pid_file, action="delete", not_if=process_id_exists_command)
    
    try:
      Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports)
    except:
      show_logs(log_dir, user)
      raise
  elif action == "stop":
    try:
      Execute(daemon_cmd, only_if=process_id_exists_command, environment=hadoop_env_exports)
    except:
      show_logs(log_dir, user)
      raise
    File(pid_file, action="delete")
Пример #26
0
    def upgrade_schema(self, env):
        """
    Executes the schema upgrade binary.  This is its own function because it could
    be called as a standalone task from the upgrade pack, but is safe to run it for each
    metastore instance. The schema upgrade on an already upgraded metastore is a NOOP.

    The metastore schema upgrade requires a database driver library for most
    databases. During an upgrade, it's possible that the library is not present,
    so this will also attempt to copy/download the appropriate driver.

    This function will also ensure that configurations are written out to disk before running
    since the new configs will most likely not yet exist on an upgrade.

    Should not be invoked for a DOWNGRADE; Metastore only supports schema upgrades.
    """
        Logger.info("Upgrading Hive Metastore Schema")
        import status_params
        import params
        env.set_params(params)

        # ensure that configurations are written out before trying to upgrade the schema
        # since the schematool needs configs and doesn't know how to use the hive conf override
        self.configure(env)

        if params.security_enabled:
            cached_kinit_executor(status_params.kinit_path_local,
                                  status_params.hive_user,
                                  params.hive_metastore_keytab_path,
                                  params.hive_metastore_principal,
                                  status_params.hostname,
                                  status_params.tmp_dir)

        # ensure that the JDBC drive is present for the schema tool; if it's not
        # present, then download it first
        if params.hive_jdbc_driver in params.hive_jdbc_drivers_list:
            target_directory = format("{stack_root}/{version}/hive/lib")

            # download it if it does not exist
            if not os.path.exists(params.source_jdbc_file):
                jdbc_connector(params.hive_jdbc_target,
                               params.hive_previous_jdbc_jar)

            target_directory_and_filename = os.path.join(
                target_directory, os.path.basename(params.source_jdbc_file))

            if params.sqla_db_used:
                target_native_libs_directory = format(
                    "{target_directory}/native/lib64")

                Execute(
                    format(
                        "yes | {sudo} cp {jars_in_hive_lib} {target_directory}"
                    ))

                Directory(target_native_libs_directory, create_parents=True)

                Execute(
                    format(
                        "yes | {sudo} cp {libs_in_hive_lib} {target_native_libs_directory}"
                    ))

                Execute(
                    format(
                        "{sudo} chown -R {hive_user}:{user_group} {hive_lib}/*"
                    ))
            else:
                # copy the JDBC driver from the older metastore location to the new location only
                # if it does not already exist
                if not os.path.exists(target_directory_and_filename):
                    Execute(('cp', params.source_jdbc_file, target_directory),
                            path=["/bin", "/usr/bin/"],
                            sudo=True)

            File(target_directory_and_filename, mode=0644)

        # build the schema tool command
        binary = format("{hive_schematool_ver_bin}/schematool")

        # the conf.server directory changed locations between stack versions
        # since the configurations have not been written out yet during an upgrade
        # we need to choose the original legacy location
        schematool_hive_server_conf_dir = params.hive_server_conf_dir
        if not (check_stack_feature(StackFeature.CONFIG_VERSIONING,
                                    params.version_for_stack_feature_checks)):
            schematool_hive_server_conf_dir = LEGACY_HIVE_SERVER_CONF

        env_dict = {'HIVE_CONF_DIR': schematool_hive_server_conf_dir}

        command = format(
            "{binary} -dbType {hive_metastore_db_type} -upgradeSchema")
        Execute(command,
                user=params.hive_user,
                tries=1,
                environment=env_dict,
                logoutput=True)
Пример #27
0
from ambari_commons.os_check import OSCheck
from resource_management.libraries.script.script import Script
from resource_management.libraries.functions import get_kinit_path
from resource_management.libraries.functions.get_not_managed_resources import get_not_managed_resources
from resource_management.libraries.resources.hdfs_resource import HdfsResource
from resource_management.libraries.functions.stack_features import check_stack_feature
from resource_management.libraries.functions.stack_features import get_stack_feature_version
from resource_management.libraries.functions import StackFeature
from ambari_commons.constants import AMBARI_SUDO_BINARY

config = Script.get_config()
tmp_dir = Script.get_tmp_dir()
artifact_dir = tmp_dir + "/AMBARI-artifacts"

version_for_stack_feature_checks = get_stack_feature_version(config)
stack_supports_hadoop_custom_extensions = check_stack_feature(
    StackFeature.HADOOP_CUSTOM_EXTENSIONS, version_for_stack_feature_checks)

sudo = AMBARI_SUDO_BINARY

# Global flag enabling or disabling the sysprep feature
host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False)

# Whether to skip copying fast-hdfs-resource.jar to /var/lib/ambari-agent/lib/
# This is required if tarballs are going to be copied to HDFS, so set to False
sysprep_skip_copy_fast_jar_hdfs = host_sys_prepped and default(
    "/configurations/cluster-env/sysprep_skip_copy_fast_jar_hdfs", False)

# Whether to skip setting up the unlimited key JCE policy
sysprep_skip_setup_jce = host_sys_prepped and default(
    "/configurations/cluster-env/sysprep_skip_setup_jce", False)
Пример #28
0
config = Script.get_config()
exec_tmp_dir = Script.get_tmp_dir()
sudo = AMBARI_SUDO_BINARY

stack_name = default("/hostLevelParams/stack_name", None)
retryAble = default("/commandParams/command_retry_enabled", False)

version = default("/commandParams/version", None)

stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
stack_version = format_stack_version(stack_version_unformatted)
stack_root = status_params.stack_root

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)

component_directory = status_params.component_directory

#hadoop params
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
daemon_script = format(
    '/usr/iop/current/{component_directory}/bin/hbase-daemon.sh')
region_mover = format(
    '/usr/iop/current/{component_directory}/bin/region_mover.rb')
region_drainer = format(
    '/usr/iop/current/{component_directory}/bin/draining_servers.rb')
hbase_cmd = format('/usr/iop/current/{component_directory}/bin/hbase')

limits_conf_dir = "/etc/security/limits.d"
Пример #29
0
def hive_service(name, action='start', upgrade_type=None):

    import params
    import status_params

    if name == 'metastore':
        pid_file = status_params.hive_metastore_pid
        cmd = format(
            "{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.err {pid_file} {hive_server_conf_dir} {hive_log_dir}"
        )
    elif name == 'hiveserver2':
        pid_file = status_params.hive_pid
        cmd = format(
            "{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.err {pid_file} {hive_server_conf_dir} {hive_log_dir}"
        )

        if params.security_enabled and check_stack_feature(
                StackFeature.HIVE_SERVER2_KERBERIZED_ENV,
                params.version_for_stack_feature_checks):
            hive_kinit_cmd = format(
                "{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; "
            )
            Execute(hive_kinit_cmd, user=params.hive_user)

    pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"),
                                                    user=params.hive_user,
                                                    is_checked_call=False)[1]
    process_id_exists_command = format(
        "ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1")

    if action == 'start':
        if name == 'hiveserver2':
            check_fs_root(params.hive_server_conf_dir, params.execute_path)

        daemon_cmd = cmd
        hadoop_home = params.hadoop_home
        hive_bin = "hive"

        # upgrading hiveserver2 (rolling_restart) means that there is an existing,
        # de-registering hiveserver2; the pid will still exist, but the new
        # hiveserver is spinning up on a new port, so the pid will be re-written
        if upgrade_type == UPGRADE_TYPE_ROLLING:
            process_id_exists_command = None

            if params.version and params.stack_root:
                hadoop_home = format("{stack_root}/{version}/hadoop")
                hive_bin = os.path.join(params.hive_bin, hive_bin)

        Execute(daemon_cmd,
                user=params.hive_user,
                environment={
                    'HADOOP_HOME': hadoop_home,
                    'JAVA_HOME': params.java64_home,
                    'HIVE_BIN': hive_bin
                },
                path=params.execute_path,
                not_if=process_id_exists_command)

        if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \
           params.hive_jdbc_driver == "org.postgresql.Driver" or \
           params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver":

            validation_called = False

            if params.hive_jdbc_target is not None:
                validation_called = True
                validate_connection(params.hive_jdbc_target, params.hive_lib)
            if params.hive2_jdbc_target is not None:
                validation_called = True
                validate_connection(params.hive2_jdbc_target,
                                    params.hive_server2_hive2_lib)

            if not validation_called:
                emessage = "ERROR! DB connection check should be executed at least one time!"
                Logger.error(emessage)

    elif action == 'stop':

        daemon_kill_cmd = format("{sudo} kill {pid}")
        daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}")

        Execute(daemon_kill_cmd,
                not_if=format("! ({process_id_exists_command})"))

        wait_time = 5
        Execute(
            daemon_hard_kill_cmd,
            not_if=format(
                "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )"
            ),
            ignore_failures=True)

        try:
            # check if stopped the process, else fail the task
            Execute(
                format("! ({process_id_exists_command})"),
                tries=20,
                try_sleep=3,
            )
        except:
            show_logs(params.hive_log_dir, params.hive_user)
            raise

        File(pid_file, action="delete")
Пример #30
0
major_stack_version = get_major_version(stack_version_formatted)

# e.g. 2.3.0.0-2130
full_stack_version = default("/commandParams/version", None)

spark_client_version = get_stack_version('spark-client')

hbase_master_hosts = default("/clusterHostInfo/hbase_master_hosts", [])
livy_hosts = default("/clusterHostInfo/livy_server_hosts", [])
livy2_hosts = default("/clusterHostInfo/livy2_server_hosts", [])

livy_livyserver_host = None
livy_livyserver_port = None
livy2_livyserver_host = None
livy2_livyserver_port = None
if stack_version_formatted and check_stack_feature(StackFeature.SPARK_LIVY, stack_version_formatted) and \
    len(livy_hosts) > 0:
  livy_livyserver_host = str(livy_hosts[0])
  livy_livyserver_port = config['configurations']['livy-conf']['livy.server.port']

if stack_version_formatted and check_stack_feature(StackFeature.SPARK_LIVY2, stack_version_formatted) and \
    len(livy2_hosts) > 0:
  livy2_livyserver_host = str(livy2_hosts[0])
  livy2_livyserver_port = config['configurations']['livy2-conf']['livy.server.port']

hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
security_enabled = config['configurations']['cluster-env']['security_enabled']
hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
kinit_path_local = get_kinit_path(default('/configurations/kerberos-env/executable_search_paths', None))
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
Пример #31
0
# server configurations
config = Script.get_config()
stack_root = Script.get_stack_root()

tmp_dir = Script.get_tmp_dir()
stack_name = status_params.stack_name
upgrade_direction = default("/commandParams/upgrade_direction", None)
version = default("/commandParams/version", None)
# E.g., 2.3.2.0
version_formatted = format_stack_version(version)

# E.g., 2.3
stack_version_unformatted = config['hostLevelParams']['stack_version']
stack_version_formatted = format_stack_version(stack_version_unformatted)
stack_supports_ranger_kerberos = stack_version_formatted and check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, stack_version_formatted)

# This is the version whose state is CURRENT. During an RU, this is the source version.
# DO NOT format it since we need the build number too.
upgrade_from_version = default("/hostLevelParams/current_version", None)

# server configurations
# Default value used in HDP 2.3.0.0 and earlier.
knox_data_dir = '/var/lib/knox/data'

# Important, it has to be strictly greater than 2.3.0.0!!!
Logger.info(format("Stack version to use is {version_formatted}"))
if version_formatted and check_stack_feature(
        StackFeature.KNOX_VERSIONED_DATA_DIR, version_formatted):
    # This is the current version. In the case of a Rolling Upgrade, it will be the newer version.
    # In the case of a Downgrade, it will be the version downgrading to.
Пример #32
0
# there is a stack upgrade which has not yet been finalized; it's currently suspended
upgrade_suspended = default("roleParams/upgrade_suspended", False)

# New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade
version = default("/commandParams/version", None)

# The desired role is only available during a Non-Rolling Upgrade in HA.
# The server calculates which of the two NameNodes will be the active, and the other the standby since they
# are started using different commands.
desired_namenode_role = default("/commandParams/desired_namenode_role", None)

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)

stack_supports_ranger_kerberos = check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks)
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)
stack_supports_zk_security = check_stack_feature(
    StackFeature.SECURE_ZOOKEEPER, version_for_stack_feature_checks)

security_enabled = config['configurations']['cluster-env']['security_enabled']
hdfs_user = status_params.hdfs_user
root_user = "******"
hadoop_pid_dir_prefix = status_params.hadoop_pid_dir_prefix
namenode_pid_file = status_params.namenode_pid_file
zkfc_pid_file = status_params.zkfc_pid_file
datanode_pid_file = status_params.datanode_pid_file

# Some datanode settings
dfs_dn_addr = default('/configurations/hdfs-site/dfs.datanode.address', None)
Пример #33
0
def oozie_server_specific():
    import params

    no_op_test = as_user(format(
        "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"
    ),
                         user=params.oozie_user)

    File(params.pid_file, action="delete", not_if=no_op_test)

    oozie_server_directories = [
        format("{oozie_home}/{oozie_tmp_dir}"), params.oozie_pid_dir,
        params.oozie_log_dir, params.oozie_tmp_dir, params.oozie_data_dir,
        params.oozie_lib_dir, params.oozie_webapps_dir,
        params.oozie_webapps_conf_dir, params.oozie_server_dir
    ]
    Directory(
        oozie_server_directories,
        owner=params.oozie_user,
        group=params.user_group,
        mode=0755,
        create_parents=True,
        cd_access="a",
    )

    Directory(
        params.oozie_libext_dir,
        create_parents=True,
    )

    hashcode_file = format("{oozie_home}/.hashcode")
    skip_recreate_sharelib = format(
        "test -f {hashcode_file} && test -d {oozie_home}/share")

    untar_sharelib = ('tar', '-xvf',
                      format('{oozie_home}/oozie-sharelib.tar.gz'), '-C',
                      params.oozie_home)

    Execute(
        untar_sharelib,  # time-expensive
        not_if=format("{no_op_test} || {skip_recreate_sharelib}"),
        sudo=True,
    )

    configure_cmds = []
    configure_cmds.append(('cp', params.ext_js_path, params.oozie_libext_dir))
    configure_cmds.append(('chown', format('{oozie_user}:{user_group}'),
                           format('{oozie_libext_dir}/{ext_js_file}')))

    Execute(
        configure_cmds,
        not_if=no_op_test,
        sudo=True,
    )

    Directory(
        params.oozie_webapps_conf_dir,
        owner=params.oozie_user,
        group=params.user_group,
        recursive_ownership=True,
        recursion_follow_links=True,
    )

    # download the database JAR
    download_database_library_if_needed()

    #falcon el extension
    if params.has_falcon_host:
        Execute(format(
            '{sudo} cp {falcon_home}/oozie/ext/falcon-oozie-el-extension-*.jar {oozie_libext_dir}'
        ),
                not_if=no_op_test)

        Execute(format(
            '{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar'
        ),
                not_if=no_op_test)

    if params.lzo_enabled and len(params.all_lzo_packages) > 0:
        Package(params.all_lzo_packages,
                retry_on_repo_unavailability=params.
                agent_stack_retry_on_unavailability,
                retry_count=params.agent_stack_retry_count)
        Execute(
            format(
                '{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'),
            not_if=no_op_test,
        )

    prepare_war(params)

    File(
        hashcode_file,
        mode=0644,
    )

    if params.stack_version_formatted and check_stack_feature(
            StackFeature.OOZIE_CREATE_HIVE_TEZ_CONFIGS,
            params.stack_version_formatted):
        # Create hive-site and tez-site configs for oozie
        Directory(params.hive_conf_dir,
                  create_parents=True,
                  owner=params.oozie_user,
                  group=params.user_group)
        if 'hive-site' in params.config['configurations']:
            XmlConfig(
                "hive-site.xml",
                conf_dir=params.hive_conf_dir,
                configurations=params.config['configurations']['hive-site'],
                configuration_attributes=params.
                config['configuration_attributes']['hive-site'],
                owner=params.oozie_user,
                group=params.user_group,
                mode=0644)
        if 'tez-site' in params.config['configurations']:
            XmlConfig(
                "tez-site.xml",
                conf_dir=params.hive_conf_dir,
                configurations=params.config['configurations']['tez-site'],
                configuration_attributes=params.
                config['configuration_attributes']['tez-site'],
                owner=params.oozie_user,
                group=params.user_group,
                mode=0664)

        # If Atlas is also installed, need to generate Atlas Hive hook (hive-atlas-application.properties file) in directory
        # {stack_root}/{current_version}/atlas/hook/hive/
        # Because this is a .properties file instead of an xml file, it will not be read automatically by Oozie.
        # However, should still save the file on this host so that can upload it to the Oozie Sharelib in DFS.
        if has_atlas_in_cluster():
            atlas_hook_filepath = os.path.join(params.hive_conf_dir,
                                               params.atlas_hook_filename)
            Logger.info(
                "Has atlas in cluster, will save Atlas Hive hook into location %s"
                % str(atlas_hook_filepath))
            setup_atlas_hook(SERVICE.HIVE,
                             params.hive_atlas_application_properties,
                             atlas_hook_filepath, params.oozie_user,
                             params.user_group)

    Directory(
        params.oozie_server_dir,
        owner=params.oozie_user,
        group=params.user_group,
        recursive_ownership=True,
    )
Пример #34
0
tmp_dir = Script.get_tmp_dir()

stack_name = status_params.stack_name
stack_root = Script.get_stack_root()
stack_version_unformatted = config['hostLevelParams']['stack_version']
stack_version_formatted = format_stack_version(stack_version_unformatted)
host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False)

# New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade
version = default("/commandParams/version", None)

spark_conf = '/etc/spark2/conf'
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")

if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted):
  hadoop_home = stack_select.get_hadoop_dir("home")
  spark_conf = format("{stack_root}/current/{component_directory}/conf")
  spark_log_dir = config['configurations']['spark2-env']['spark_log_dir']
  spark_pid_dir = status_params.spark_pid_dir
  spark_home = format("{stack_root}/current/{component_directory}")

spark_thrift_server_conf_file = spark_conf + "/spark-thrift-sparkconf.conf"
java_home = config['hostLevelParams']['java_home']

hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name']
hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
user_group = config['configurations']['cluster-env']['user_group']

spark_user = status_params.spark_user
Пример #35
0
hdfs_tmp_dir = config['configurations']['hadoop-env']['hdfs_tmp_dir']

# hadoop default parameters
mapreduce_libs_path = "/usr/lib/hadoop-mapreduce/*"
hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec")
hadoop_bin = stack_select.get_hadoop_dir("sbin")
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
hadoop_home = stack_select.get_hadoop_dir("home")
hadoop_secure_dn_user = hdfs_user
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
hadoop_conf_secure_dir = os.path.join(hadoop_conf_dir, "secure")
hadoop_lib_home = stack_select.get_hadoop_dir("lib")

# hadoop parameters for stacks that support rolling_upgrade
if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted):
  mapreduce_libs_path = format("{stack_root}/current/hadoop-mapreduce-client/*")

  if not security_enabled:
    hadoop_secure_dn_user = '******'
  else:
    dfs_dn_port = utils.get_port(dfs_dn_addr)
    dfs_dn_http_port = utils.get_port(dfs_dn_http_addr)
    dfs_dn_https_port = utils.get_port(dfs_dn_https_addr)
    # We try to avoid inability to start datanode as a plain user due to usage of root-owned ports
    if dfs_http_policy == "HTTPS_ONLY":
      secure_dn_ports_are_in_use = utils.is_secure_port(dfs_dn_port) or utils.is_secure_port(dfs_dn_https_port)
    elif dfs_http_policy == "HTTP_AND_HTTPS":
      secure_dn_ports_are_in_use = utils.is_secure_port(dfs_dn_port) or utils.is_secure_port(dfs_dn_http_port) or utils.is_secure_port(dfs_dn_https_port)
    else:   # params.dfs_http_policy == "HTTP_ONLY" or not defined:
      secure_dn_ports_are_in_use = utils.is_secure_port(dfs_dn_port) or utils.is_secure_port(dfs_dn_http_port)
Пример #36
0
def _valid(stack_name, package, ver):
  return (ver and check_stack_feature(StackFeature.CONFIG_VERSIONING, ver))
Пример #37
0
def get_hadoop_conf_dir(force_latest_on_upgrade=False):
  """
  Gets the shared hadoop conf directory using:
  1.  Start with /etc/hadoop/conf
  2.  When the stack is greater than HDP-2.2, use <stack-root>/current/hadoop-client/conf
  3.  Only when doing a RU and HDP-2.3 or higher, use the value as computed
      by <conf-selector-tool>.  This is in the form <stack-root>/VERSION/hadoop/conf to make sure
      the configs are written in the correct place. However, if the component itself has
      not yet been upgraded, it should use the hadoop configs from the prior version.
      This will perform an <stack-selector-tool> status to determine which version to use.
  :param force_latest_on_upgrade:  if True, then force the returned path to always
  be that of the upgrade target version, even if <stack-selector-tool> has not been called. This
  is primarily used by hooks like before-ANY to ensure that hadoop environment
  configurations are written to the correct location since they are written out
  before the <stack-selector-tool>/<conf-selector-tool> would have been called.
  """
  hadoop_conf_dir = "/etc/hadoop/conf"
  stack_name = None
  stack_root = Script.get_stack_root()
  stack_version = Script.get_stack_version()
  version = None
  allow_setting_conf_select_symlink = False

  if not Script.in_stack_upgrade():
    # During normal operation, the HDP stack must be 2.3 or higher
    if stack_version and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version):
      hadoop_conf_dir = os.path.join(stack_root, "current", "hadoop-client", "conf")

    if stack_version and check_stack_feature(StackFeature.CONFIG_VERSIONING, stack_version):
      hadoop_conf_dir = os.path.join(stack_root, "current", "hadoop-client", "conf")
      stack_name = default("/hostLevelParams/stack_name", None)
      version = default("/commandParams/version", None)

      if stack_name and version:
        version = str(version)
        allow_setting_conf_select_symlink = True
  else:
    # During an upgrade/downgrade, which can be a Rolling or Express Upgrade, need to calculate it based on the version
    '''
    Whenever upgrading to HDP 2.2, or downgrading back to 2.2, need to use /etc/hadoop/conf
    Whenever upgrading to HDP 2.3, or downgrading back to 2.3, need to use a versioned hadoop conf dir

    Type__|_Source_|_Target_|_Direction_____________|_Comment_____________________________________________________________
    Normal|        | 2.2    |                       | Use /etc/hadoop/conf
    Normal|        | 2.3    |                       | Use /etc/hadoop/conf, which should be a symlink to <stack-root>/current/hadoop-client/conf
    EU    | 2.1    | 2.3    | Upgrade               | Use versioned <stack-root>/current/hadoop-client/conf
          |        |        | No Downgrade Allowed  | Invalid
    EU/RU | 2.2    | 2.2.*  | Any                   | Use <stack-root>/current/hadoop-client/conf
    EU/RU | 2.2    | 2.3    | Upgrade               | Use <stack-root>/$version/hadoop/conf, which should be a symlink destination
          |        |        | Downgrade             | Use <stack-root>/current/hadoop-client/conf
    EU/RU | 2.3    | 2.3.*  | Any                   | Use <stack-root>/$version/hadoop/conf, which should be a symlink destination
    '''

    # The "stack_version" is the desired stack, e.g., 2.2 or 2.3
    # In an RU, it is always the desired stack, and doesn't change even during the Downgrade!
    # In an RU Downgrade from HDP 2.3 to 2.2, the first thing we do is 
    # rm /etc/[component]/conf and then mv /etc/[component]/conf.backup /etc/[component]/conf
    if stack_version and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version):
      hadoop_conf_dir = os.path.join(stack_root, "current", "hadoop-client", "conf")

      # This contains the "version", including the build number, that is actually used during a stack upgrade and
      # is the version upgrading/downgrading to.
      stack_info = stack_select._get_upgrade_stack()

      if stack_info is not None:
        stack_name = stack_info[0]
        version = stack_info[1]
      else:
        raise Fail("Unable to get parameter 'version'")
      
      Logger.info("In the middle of a stack upgrade/downgrade for Stack {0} and destination version {1}, determining which hadoop conf dir to use.".format(stack_name, version))
      # This is the version either upgrading or downgrading to.
      if version and check_stack_feature(StackFeature.CONFIG_VERSIONING, version):
        # Determine if <stack-selector-tool> has been run and if not, then use the current
        # hdp version until this component is upgraded.
        if not force_latest_on_upgrade:
          current_stack_version = stack_select.get_role_component_current_stack_version()
          if current_stack_version is not None and version != current_stack_version:
            version = current_stack_version
            stack_selector_name = stack_tools.get_stack_tool_name(stack_tools.STACK_SELECTOR_NAME)
            Logger.info("{0} has not yet been called to update the symlink for this component, "
                        "keep using version {1}".format(stack_selector_name, current_stack_version))

        # Only change the hadoop_conf_dir path, don't <conf-selector-tool> this older version
        hadoop_conf_dir = os.path.join(stack_root, version, "hadoop", "conf")
        Logger.info("Hadoop conf dir: {0}".format(hadoop_conf_dir))

        allow_setting_conf_select_symlink = True

  if allow_setting_conf_select_symlink:
    # If not in the middle of an upgrade and on HDP 2.3 or higher, or if
    # upgrading stack to version 2.3.0.0 or higher (which may be upgrade or downgrade), then consider setting the
    # symlink for /etc/hadoop/conf.
    # If a host does not have any HDFS or YARN components (e.g., only ZK), then it will not contain /etc/hadoop/conf
    # Therefore, any calls to <conf-selector-tool> will fail.
    # For that reason, if the hadoop conf directory exists, then make sure it is set.
    if os.path.exists(hadoop_conf_dir):
      conf_selector_name = stack_tools.get_stack_tool_name(stack_tools.CONF_SELECTOR_NAME)
      Logger.info("The hadoop conf dir {0} exists, will call {1} on it for version {2}".format(
              hadoop_conf_dir, conf_selector_name, version))
      select(stack_name, "hadoop", version)

  Logger.info("Using hadoop conf dir: {0}".format(hadoop_conf_dir))
  return hadoop_conf_dir
Пример #38
0
def setup_spark(env, type, upgrade_type=None, action=None, config_dir=None):
    """
  :param env: Python environment
  :param type: Spark component type
  :param upgrade_type: If in a stack upgrade, either UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING
  :param action: Action to perform, such as generate configs
  :param config_dir: Optional config directory to write configs to.
  """

    import params

    if config_dir is None:
        config_dir = params.spark_conf

    Directory([params.spark_pid_dir, params.spark_log_dir],
              owner=params.spark_user,
              group=params.user_group,
              mode=0775,
              create_parents=True)
    if type == 'server' and action == 'config':
        params.HdfsResource(params.spark_hdfs_user_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.spark_user,
                            mode=0775)
        params.HdfsResource(None, action="execute")

    PropertiesFile(
        os.path.join(config_dir, "spark-defaults.conf"),
        properties=params.config['configurations']['spark-defaults'],
        key_value_delimiter=" ",
        owner=params.spark_user,
        group=params.spark_group,
        mode=0644)

    # create spark-env.sh in etc/conf dir
    File(
        os.path.join(config_dir, 'spark-env.sh'),
        owner=params.spark_user,
        group=params.spark_group,
        content=InlineTemplate(params.spark_env_sh),
        mode=0644,
    )

    #create log4j.properties in etc/conf dir
    File(
        os.path.join(config_dir, 'log4j.properties'),
        owner=params.spark_user,
        group=params.spark_group,
        content=params.spark_log4j_properties,
        mode=0644,
    )

    #create metrics.properties in etc/conf dir
    File(os.path.join(config_dir, 'metrics.properties'),
         owner=params.spark_user,
         group=params.spark_group,
         content=InlineTemplate(params.spark_metrics_properties),
         mode=0644)

    Directory(
        params.spark_logs_dir,
        owner=params.spark_user,
        group=params.spark_group,
        mode=0755,
    )

    if params.is_hive_installed:
        XmlConfig("hive-site.xml",
                  conf_dir=config_dir,
                  configurations=params.spark_hive_properties,
                  owner=params.spark_user,
                  group=params.spark_group,
                  mode=0644)

    if params.has_spark_thriftserver:
        PropertiesFile(params.spark_thrift_server_conf_file,
                       properties=params.config['configurations']
                       ['spark-thrift-sparkconf'],
                       owner=params.hive_user,
                       group=params.user_group,
                       key_value_delimiter=" ",
                       mode=0644)

    effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
    if effective_version:
        effective_version = format_stack_version(effective_version)

    if effective_version and check_stack_feature(
            StackFeature.SPARK_JAVA_OPTS_SUPPORT, effective_version):
        File(os.path.join(params.spark_conf, 'java-opts'),
             owner=params.spark_user,
             group=params.spark_group,
             content=InlineTemplate(params.spark_javaopts_properties),
             mode=0644)
    else:
        File(os.path.join(params.spark_conf, 'java-opts'), action="delete")

    if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature(
            StackFeature.SPARK_16PLUS, effective_version):
        # create spark-thrift-fairscheduler.xml
        File(os.path.join(config_dir, "spark-thrift-fairscheduler.xml"),
             owner=params.spark_user,
             group=params.spark_group,
             mode=0755,
             content=InlineTemplate(params.spark_thrift_fairscheduler_content))
Пример #39
0
    def prepare_libext_directory():
        """
    Performs the following actions on libext:
      - creates <stack-root>/current/oozie/libext and recursively
      - set 777 permissions on it and its parents.
      - downloads JDBC driver JAR if needed
      - copies Falcon JAR for the Oozie WAR if needed
    """
        import params

        # some stack versions don't need the lzo compression libraries
        target_version_needs_compression_libraries = params.version and check_stack_feature(
            StackFeature.LZO, params.version)

        # ensure the directory exists
        Directory(params.oozie_libext_dir, mode=0777)

        # get all hadooplzo* JAR files
        # <stack-selector-tool> set hadoop-client has not run yet, therefore we cannot use
        # <stack-root>/current/hadoop-client ; we must use params.version directly
        # however, this only works when upgrading beyond 2.2.0.0; don't do this
        # for downgrade to 2.2.0.0 since hadoop-lzo will not be present
        # This can also be called during a Downgrade.
        # When a version is Installed, it is responsible for downloading the hadoop-lzo packages
        # if lzo is enabled.
        if params.lzo_enabled and (
                params.upgrade_direction == Direction.UPGRADE
                or target_version_needs_compression_libraries):
            hadoop_lzo_pattern = 'hadoop-lzo*.jar'
            hadoop_client_new_lib_dir = format(
                "{stack_root}/{version}/hadoop/lib")

            files = glob.iglob(
                os.path.join(hadoop_client_new_lib_dir, hadoop_lzo_pattern))
            if not files:
                raise Fail("There are no files at {0} matching {1}".format(
                    hadoop_client_new_lib_dir, hadoop_lzo_pattern))

            # copy files into libext
            files_copied = False
            for file in files:
                if os.path.isfile(file):
                    Logger.info("Copying {0} to {1}".format(
                        str(file), params.oozie_libext_dir))
                    shutil.copy2(file, params.oozie_libext_dir)
                    files_copied = True

            if not files_copied:
                raise Fail("There are no files at {0} matching {1}".format(
                    hadoop_client_new_lib_dir, hadoop_lzo_pattern))

        # something like <stack-root>/current/oozie-server/libext/ext-2.2.zip
        oozie_ext_zip_target_path = os.path.join(params.oozie_libext_dir,
                                                 params.ext_js_file)

        # Copy ext ZIP to libext dir
        # Default to /usr/share/$TARGETSTACK-oozie/ext-2.2.zip as the first path
        source_ext_zip_paths = oozie.get_oozie_ext_zip_source_paths(
            upgrade_type, params)

        found_at_least_one_oozie_ext_file = False

        # Copy the first oozie ext-2.2.zip file that is found.
        # This uses a list to handle the cases when migrating from some versions of BigInsights to HDP.
        if source_ext_zip_paths is not None:
            for source_ext_zip_path in source_ext_zip_paths:
                if os.path.isfile(source_ext_zip_path):
                    found_at_least_one_oozie_ext_file = True
                    Logger.info("Copying {0} to {1}".format(
                        source_ext_zip_path, params.oozie_libext_dir))
                    Execute(
                        ("cp", source_ext_zip_path, params.oozie_libext_dir),
                        sudo=True)
                    Execute(("chown", format("{oozie_user}:{user_group}"),
                             oozie_ext_zip_target_path),
                            sudo=True)
                    File(oozie_ext_zip_target_path, mode=0644)
                    break

        if not found_at_least_one_oozie_ext_file:
            raise Fail(
                "Unable to find any Oozie source extension files from the following paths {0}"
                .format(source_ext_zip_paths))

        # Redownload jdbc driver to a new current location
        oozie.download_database_library_if_needed()

        # get the upgrade version in the event that it's needed
        upgrade_stack = stack_select._get_upgrade_stack()
        if upgrade_stack is None or len(
                upgrade_stack) < 2 or upgrade_stack[1] is None:
            raise Fail(
                "Unable to determine the stack that is being upgraded to or downgraded to."
            )

        stack_version = upgrade_stack[1]

        # copy the Falcon JAR if needed; falcon has not upgraded yet, so we must
        # use the versioned falcon directory
        if params.has_falcon_host:
            versioned_falcon_jar_directory = "{0}/{1}/falcon/oozie/ext/falcon-oozie-el-extension-*.jar".format(
                params.stack_root, stack_version)
            Logger.info("Copying {0} to {1}".format(
                versioned_falcon_jar_directory, params.oozie_libext_dir))

            Execute(
                format(
                    '{sudo} cp {versioned_falcon_jar_directory} {oozie_libext_dir}'
                ))
            Execute(
                format(
                    '{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar'
                ))
Пример #40
0
architecture = get_architecture()

stack_name = status_params.stack_name
stack_root = Script.get_stack_root()
tarball_map = default("/configurations/cluster-env/tarball_map", None)

config_path = os.path.join(stack_root, "current/hadoop-client/conf")
config_dir = os.path.realpath(config_path)

# This is expected to be of the form #.#.#.#
stack_version_unformatted = config['hostLevelParams']['stack_version']
stack_version_formatted_major = format_stack_version(stack_version_unformatted)
stack_version_formatted = functions.get_stack_version(
    'hadoop-yarn-resourcemanager')

stack_supports_ru = stack_version_formatted_major and check_stack_feature(
    StackFeature.ROLLING_UPGRADE, stack_version_formatted_major)
stack_supports_timeline_state_store = stack_version_formatted_major and check_stack_feature(
    StackFeature.TIMELINE_STATE_STORE, stack_version_formatted_major)

# New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade.
# It cannot be used during the initial Cluser Install because the version is not yet known.
version = default("/commandParams/version", None)

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)

stack_supports_ranger_kerberos = check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks)
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)
def spark_service(name, upgrade_type=None, action=None):
  import params

  if action == 'start':

    effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
    if effective_version:
      effective_version = format_stack_version(effective_version)

    if effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version):
      # create & copy spark2-hdp-yarn-archive.tar.gz to hdfs
      source_dir=params.spark_home+"/jars"
      tmp_archive_file="/tmp/spark2/spark2-hdp-yarn-archive.tar.gz"
      make_tarfile(tmp_archive_file, source_dir)
      copy_to_hdfs("spark2", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)
      # create spark history directory
      params.HdfsResource(params.spark_history_dir,
                          type="directory",
                          action="create_on_execute",
                          owner=params.spark_user,
                          group=params.user_group,
                          mode=0777,
                          recursive_chmod=True
                          )
      params.HdfsResource(None, action="execute")

    if params.security_enabled:
      spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ")
      Execute(spark_kinit_cmd, user=params.spark_user)

    # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not
    # need to copy the tarball, otherwise, copy it.
    if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted):
      resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)
      if resource_created:
        params.HdfsResource(None, action="execute")

    if name == 'jobhistoryserver':
      historyserver_no_op_test = format(
      'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1')
      try:
        Execute(format('{spark_history_server_start}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=historyserver_no_op_test)
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise

    elif name == 'sparkthriftserver':
      if params.security_enabled:
        hive_principal = params.hive_kerberos_principal.replace('_HOST', socket.getfqdn().lower())
        hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; ")
        Execute(hive_kinit_cmd, user=params.hive_user)

      thriftserver_no_op_test = format(
      'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1')
      try:
        Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'),
                user=params.hive_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=thriftserver_no_op_test
        )
      except:
        show_logs(params.spark_log_dir, user=params.hive_user)
        raise
  elif action == 'stop':
    if name == 'jobhistoryserver':
      try:
        Execute(format('{spark_history_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_history_server_pid_file,
        action="delete"
      )

    elif name == 'sparkthriftserver':
      try:
        Execute(format('{spark_thrift_server_stop}'),
                user=params.hive_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.hive_user)
        raise
      File(params.spark_thrift_server_pid_file,
        action="delete"
      )
Пример #42
0
stack_name = status_params.stack_name
upgrade_direction = default("/commandParams/upgrade_direction", None)
version = default("/commandParams/version", None)

agent_stack_retry_on_unavailability = config['hostLevelParams'][
    'agent_stack_retry_on_unavailability']
agent_stack_retry_count = expect("/hostLevelParams/agent_stack_retry_count",
                                 int)

storm_component_home_dir = status_params.storm_component_home_dir
conf_dir = status_params.conf_dir

stack_version_unformatted = status_params.stack_version_unformatted
stack_version_formatted = status_params.stack_version_formatted
stack_supports_ru = stack_version_formatted and check_stack_feature(
    StackFeature.ROLLING_UPGRADE, stack_version_formatted)
stack_supports_storm_kerberos = stack_version_formatted and check_stack_feature(
    StackFeature.STORM_KERBEROS, stack_version_formatted)
stack_supports_storm_ams = stack_version_formatted and check_stack_feature(
    StackFeature.STORM_AMS, stack_version_formatted)
stack_supports_core_site_for_ranger_plugin = check_stack_feature(
    StackFeature.CORE_SITE_FOR_RANGER_PLUGINS_SUPPORT, stack_version_formatted)

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)

stack_supports_ranger_kerberos = check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks)
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)