def hive(name=None): import params if name == 'hiveserver2': # copy tarball to HDFS feature not supported if not (params.stack_version_formatted_major and check_stack_feature(StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major)): params.HdfsResource(params.webhcat_apps_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=0755) # Create webhcat dirs. if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir: params.HdfsResource(params.hcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hcat_user, mode=params.hcat_hdfs_user_mode) params.HdfsResource(params.webhcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=params.webhcat_hdfs_user_mode) # ****** Begin Copy Tarballs ****** # ********************************* # if copy tarball to HDFS feature supported copy mapreduce.tar.gz and tez.tar.gz to HDFS if params.stack_version_formatted_major and check_stack_feature( StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major): copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode. # This can use a different source and dest location to account copy_to_hdfs("pig", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.pig_tar_source, custom_dest_file=params.pig_tar_dest_file, host_sys_prepped=params.host_sys_prepped) copy_to_hdfs("hive", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.hive_tar_source, custom_dest_file=params.hive_tar_dest_file, host_sys_prepped=params.host_sys_prepped) wildcard_tarballs = ["sqoop", "hadoop_streaming"] for tarball_name in wildcard_tarballs: source_file_pattern = eval("params." + tarball_name + "_tar_source") dest_dir = eval("params." + tarball_name + "_tar_dest_dir") if source_file_pattern is None or dest_dir is None: continue source_files = glob.glob( source_file_pattern) if "*" in source_file_pattern else [ source_file_pattern ] for source_file in source_files: src_filename = os.path.basename(source_file) dest_file = os.path.join(dest_dir, src_filename) copy_to_hdfs(tarball_name, params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=source_file, custom_dest_file=dest_file, host_sys_prepped=params.host_sys_prepped) # ******* End Copy Tarballs ******* # ********************************* # if warehouse directory is in DFS if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse( params.default_fs).scheme: # Create Hive Metastore Warehouse Dir params.HdfsResource(params.hive_apps_whs_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=0777) else: Logger.info( format( "Not creating warehouse directory '{hive_apps_whs_dir}', as the location is not in DFS." )) # Create Hive User Dir params.HdfsResource(params.hive_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=params.hive_hdfs_user_mode) if not is_empty(params.hive_exec_scratchdir) and not urlparse( params.hive_exec_scratchdir).path.startswith("/tmp"): params.HdfsResource( params.hive_exec_scratchdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.hdfs_user, mode=0777 ) # Hive expects this dir to be writeable by everyone as it is used as a temp dir params.HdfsResource(None, action="execute") Directory(params.hive_etc_dir_prefix, mode=0755) # We should change configurations for client as well as for server. # The reason is that stale-configs are service-level, not component. Logger.info("Directories to fill with configs: %s" % str(params.hive_conf_dirs_list)) for conf_dir in params.hive_conf_dirs_list: fill_conf_dir(conf_dir) XmlConfig( "hive-site.xml", conf_dir=params.hive_config_dir, configurations=params.hive_site_config, configuration_attributes=params.config['configuration_attributes'] ['hive-site'], owner=params.hive_user, group=params.user_group, mode=0644) setup_atlas_hive() if name == 'hiveserver2': XmlConfig( "hiveserver2-site.xml", conf_dir=params.hive_server_conf_dir, configurations=params.config['configurations']['hiveserver2-site'], configuration_attributes=params.config['configuration_attributes'] ['hiveserver2-site'], owner=params.hive_user, group=params.user_group, mode=0644) if params.hive_metastore_site_supported and name == 'metastore': XmlConfig( "hivemetastore-site.xml", conf_dir=params.hive_server_conf_dir, configurations=params.config['configurations'] ['hivemetastore-site'], configuration_attributes=params.config['configuration_attributes'] ['hivemetastore-site'], owner=params.hive_user, group=params.user_group, mode=0644) File(format("{hive_config_dir}/hive-env.sh"), owner=params.hive_user, group=params.user_group, content=InlineTemplate(params.hive_env_sh_template)) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'hive.conf'), owner='root', group='root', mode=0644, content=Template("hive.conf.j2")) if name == 'metastore' or name == 'hiveserver2': if params.hive_jdbc_target is not None and not os.path.exists( params.hive_jdbc_target): jdbc_connector(params.hive_jdbc_target, params.hive_previous_jdbc_jar) if params.hive2_jdbc_target is not None and not os.path.exists( params.hive2_jdbc_target): jdbc_connector(params.hive2_jdbc_target, params.hive2_previous_jdbc_jar) File( format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), mode=0644, ) if name == 'metastore': File(os.path.join(params.hive_server_conf_dir, "hadoop-metrics2-hivemetastore.properties"), owner=params.hive_user, group=params.user_group, content=Template("hadoop-metrics2-hivemetastore.properties.j2")) File(params.start_metastore_path, mode=0755, content=StaticFile('startMetastore.sh')) if params.init_metastore_schema: create_schema_cmd = format( "export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_schematool_bin}/schematool -initSchema " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p} -verbose") check_schema_created_cmd = as_user( format("export HIVE_CONF_DIR={hive_server_conf_dir} ; " "{hive_schematool_bin}/schematool -info " "-dbType {hive_metastore_db_type} " "-userName {hive_metastore_user_name} " "-passWord {hive_metastore_user_passwd!p} -verbose"), params.hive_user) # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords. # Fixing it with the hack below: quoted_hive_metastore_user_passwd = quote_bash_args( quote_bash_args(params.hive_metastore_user_passwd)) if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \ or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"': quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[ 1:-1] Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr( check_schema_created_cmd.replace( format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING)) Execute(create_schema_cmd, not_if=check_schema_created_cmd, user=params.hive_user) elif name == 'hiveserver2': File(params.start_hiveserver2_path, mode=0755, content=Template(format('{start_hiveserver2_script}'))) File(os.path.join(params.hive_server_conf_dir, "hadoop-metrics2-hiveserver2.properties"), owner=params.hive_user, group=params.user_group, content=Template("hadoop-metrics2-hiveserver2.properties.j2")) if name != "client": Directory(params.hive_pid_dir, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755) Directory(params.hive_log_dir, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755) Directory(params.hive_var_lib, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755)
def hbase(name=None): import params # ensure that matching LZO libraries are installed for HBase lzo_utils.install_lzo_if_needed() Directory(params.etc_prefix_dir, mode=0755) Directory(params.hbase_conf_dir, owner=params.hbase_user, group=params.user_group, create_parents=True) Directory(params.java_io_tmpdir, create_parents=True, mode=0777) # If a file location is specified in ioengine parameter, # ensure that directory exists. Otherwise create the # directory with permissions assigned to hbase:hadoop. ioengine_input = params.ioengine_param if ioengine_input != None: if ioengine_input.startswith("file:/"): ioengine_fullpath = ioengine_input[5:] ioengine_dir = os.path.dirname(ioengine_fullpath) Directory(ioengine_dir, owner=params.hbase_user, group=params.user_group, create_parents=True, mode=0755) parent_dir = os.path.dirname(params.tmp_dir) # In case if we have several placeholders in path while ("${" in parent_dir): parent_dir = os.path.dirname(parent_dir) if parent_dir != os.path.abspath(os.sep): Directory( parent_dir, create_parents=True, cd_access="a", ) Execute(("chmod", "1777", parent_dir), sudo=True) XmlConfig( "hbase-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hbase-site'], configuration_attributes=params.config['configuration_attributes'] ['hbase-site'], owner=params.hbase_user, group=params.user_group) if check_stack_feature(StackFeature.PHOENIX_CORE_HDFS_SITE_REQUIRED, params.version_for_stack_feature_checks): XmlConfig( "core-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configuration_attributes'] ['core-site'], owner=params.hbase_user, group=params.user_group) if 'hdfs-site' in params.config['configurations']: XmlConfig( "hdfs-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params. config['configuration_attributes']['hdfs-site'], owner=params.hbase_user, group=params.user_group) else: File(format("{params.hbase_conf_dir}/hdfs-site.xml"), action="delete") File(format("{params.hbase_conf_dir}/core-site.xml"), action="delete") if 'hbase-policy' in params.config['configurations']: XmlConfig( "hbase-policy.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hbase-policy'], configuration_attributes=params.config['configuration_attributes'] ['hbase-policy'], owner=params.hbase_user, group=params.user_group) # Manually overriding ownership of file installed by hadoop package else: File(format("{params.hbase_conf_dir}/hbase-policy.xml"), owner=params.hbase_user, group=params.user_group) File( format("{hbase_conf_dir}/hbase-env.sh"), owner=params.hbase_user, content=InlineTemplate(params.hbase_env_sh_template), group=params.user_group, ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'hbase.conf'), owner='root', group='root', mode=0644, content=Template("hbase.conf.j2")) hbase_TemplateConfig('regionservers') if params.security_enabled: hbase_TemplateConfig(format("hbase_{name}_jaas.conf")) if name != "client": Directory( params.pid_dir, owner=params.hbase_user, create_parents=True, cd_access="a", mode=0755, ) Directory( params.log_dir, owner=params.hbase_user, create_parents=True, cd_access="a", mode=0755, ) if (params.log4j_props != None): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user, content=InlineTemplate(params.log4j_props)) elif (os.path.exists(format("{params.hbase_conf_dir}/log4j.properties"))): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user) if name == "master": params.HdfsResource(params.hbase_hdfs_root_dir, type="directory", action="create_on_execute", owner=params.hbase_user) params.HdfsResource(params.hbase_staging_dir, type="directory", action="create_on_execute", owner=params.hbase_user, mode=0711) if params.create_hbase_home_directory: params.HdfsResource(params.hbase_home_directory, type="directory", action="create_on_execute", owner=params.hbase_user, mode=0755) params.HdfsResource(None, action="execute") if params.phoenix_enabled: Package(params.phoenix_package, retry_on_repo_unavailability=params. agent_stack_retry_on_unavailability, retry_count=params.agent_stack_retry_count)
def setup_ranger_knox(upgrade_type=None): import params if params.enable_ranger_knox: stack_version = None if upgrade_type is not None: stack_version = params.version if params.retryAble: Logger.info("Knox: Setup ranger: command retry enables thus retrying if ranger admin is down !") else: Logger.info("Knox: Setup ranger: command retry not enabled thus skipping if ranger admin is down !") if params.xml_configurations_supported and params.enable_ranger_knox and params.xa_audit_hdfs_is_enabled: if params.has_namenode: params.HdfsResource("/ranger/audit", type="directory", action="create_on_execute", owner=params.hdfs_user, group=params.hdfs_user, mode=0755, recursive_chmod=True ) params.HdfsResource("/ranger/audit/knox", type="directory", action="create_on_execute", owner=params.knox_user, group=params.knox_user, mode=0700, recursive_chmod=True ) params.HdfsResource(None, action="execute") if params.namenode_hosts is not None and len(params.namenode_hosts) > 1: Logger.info('Ranger Knox plugin is enabled in NameNode HA environment along with audit to Hdfs enabled, creating hdfs-site.xml') XmlConfig("hdfs-site.xml", conf_dir=params.knox_conf_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configurationAttributes']['hdfs-site'], owner=params.knox_user, group=params.knox_group, mode=0644 ) else: File(format('{knox_conf_dir}/hdfs-site.xml'), action="delete") if params.xml_configurations_supported: api_version=None if params.stack_supports_ranger_kerberos: api_version='v2' from resource_management.libraries.functions.setup_ranger_plugin_xml import setup_ranger_plugin setup_ranger_plugin('knox-server', 'knox', params.previous_jdbc_jar, params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java_home, params.repo_name, params.knox_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_knox, conf_dict=params.knox_conf_dir, component_user=params.knox_user, component_group=params.knox_group, cache_service_list=['knox'], plugin_audit_properties=params.config['configurations']['ranger-knox-audit'], plugin_audit_attributes=params.config['configurationAttributes']['ranger-knox-audit'], plugin_security_properties=params.config['configurations']['ranger-knox-security'], plugin_security_attributes=params.config['configurationAttributes']['ranger-knox-security'], plugin_policymgr_ssl_properties=params.config['configurations']['ranger-knox-policymgr-ssl'], plugin_policymgr_ssl_attributes=params.config['configurationAttributes']['ranger-knox-policymgr-ssl'], component_list=['knox-server'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, stack_version_override = stack_version, skip_if_rangeradmin_down= not params.retryAble,api_version=api_version, is_security_enabled = params.security_enabled, is_stack_supports_ranger_kerberos = params.stack_supports_ranger_kerberos, component_user_principal=params.knox_principal_name if params.security_enabled else None, component_user_keytab=params.knox_keytab_path if params.security_enabled else None) else: from resource_management.libraries.functions.setup_ranger_plugin import setup_ranger_plugin setup_ranger_plugin('knox-server', 'knox', params.previous_jdbc_jar, params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java_home, params.repo_name, params.knox_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_knox, conf_dict=params.knox_conf_dir, component_user=params.knox_user, component_group=params.knox_group, cache_service_list=['knox'], plugin_audit_properties=params.config['configurations']['ranger-knox-audit'], plugin_audit_attributes=params.config['configurationAttributes']['ranger-knox-audit'], plugin_security_properties=params.config['configurations']['ranger-knox-security'], plugin_security_attributes=params.config['configurationAttributes']['ranger-knox-security'], plugin_policymgr_ssl_properties=params.config['configurations']['ranger-knox-policymgr-ssl'], plugin_policymgr_ssl_attributes=params.config['configurationAttributes']['ranger-knox-policymgr-ssl'], component_list=['knox-server'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, stack_version_override = stack_version, skip_if_rangeradmin_down= not params.retryAble) if params.stack_supports_core_site_for_ranger_plugin and params.enable_ranger_knox and params.security_enabled: if params.has_namenode: Logger.info("Stack supports core-site.xml creation for Ranger plugin and Namenode is installed, creating create core-site.xml from namenode configurations") setup_core_site_for_required_plugins(component_user = params.knox_user, component_group = params.knox_group, create_core_site_path = params.knox_conf_dir, configurations = params.config['configurations']['core-site'], configuration_attributes = params.config['configuration_attributes']['core-site']) else: Logger.info("Stack supports core-site.xml creation for Ranger plugin and Namenode is not installed, creating create core-site.xml from default configurations") setup_core_site_for_required_plugins(component_user = params.knox_user, component_group = params.knox_group, create_core_site_path = params.knox_conf_dir, configurations = { 'hadoop.security.authentication' : 'kerberos' if params.security_enabled else 'simple' }, configuration_attributes = {}) else: Logger.info("Stack does not support core-site.xml creation for Ranger plugin, skipping core-site.xml configurations") else: Logger.info('Ranger Knox plugin is not enabled')
def setup_historyserver(): import params if params.yarn_log_aggregation_enabled: params.HdfsResource(params.yarn_nm_app_log_dir, action="create_on_execute", type="directory", owner=params.yarn_user, group=params.user_group, mode=01777, recursive_chmod=True) # create the /tmp folder with proper permissions if it doesn't exist yet if params.entity_file_history_directory.startswith('/tmp'): params.HdfsResource( params.hdfs_tmp_dir, action="create_on_execute", type="directory", owner=params.hdfs_user, mode=0777, ) params.HdfsResource(params.yarn_system_service_dir + '/async', action="create_on_execute", type="directory", owner=params.yarn_user, group=params.user_group) params.HdfsResource(params.yarn_system_service_dir + '/sync', action="create_on_execute", type="directory", owner=params.yarn_user, group=params.user_group) params.HdfsResource(params.entity_file_history_directory, action="create_on_execute", type="directory", owner=params.yarn_user, group=params.user_group) params.HdfsResource("/mapred", type="directory", action="create_on_execute", owner=params.mapred_user) params.HdfsResource("/mapred/system", type="directory", action="create_on_execute", owner=params.hdfs_user) params.HdfsResource(params.mapreduce_jobhistory_done_dir, type="directory", action="create_on_execute", owner=params.mapred_user, group=params.user_group, change_permissions_for_parents=True, mode=0777) params.HdfsResource(None, action="execute") Directory( params.jhs_leveldb_state_store_dir, owner=params.mapred_user, group=params.user_group, create_parents=True, cd_access="a", recursive_ownership=True, ) generate_logfeeder_input_config( 'mapreduce2', Template("input.config-mapreduce2.json.j2", extra_imports=[default]))
def spark_service(name, upgrade_type=None, action=None): import params if action == 'start': effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if effective_version and compare_versions(effective_version, '2.4.0.0') >= 0: # copy spark-hdp-assembly.jar to hdfs copy_to_hdfs("spark", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) # create spark history directory params.HdfsResource(params.spark_history_dir, type="directory", action="create_on_execute", owner=params.spark_user, group=params.user_group, mode=0777, recursive_chmod=True) params.HdfsResource(None, action="execute") if params.security_enabled: spark_kinit_cmd = format( "{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; " ) Execute(spark_kinit_cmd, user=params.spark_user) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.stack_version_formatted and compare_versions( params.stack_version_formatted, '2.3.0.0') < 0: resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute") if name == 'jobhistoryserver': historyserver_no_op_test = format( 'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1' ) Execute(format('{spark_history_server_start}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}, not_if=historyserver_no_op_test) elif name == 'sparkthriftserver': if params.security_enabled: hive_principal = params.hive_kerberos_principal.replace( '_HOST', socket.getfqdn().lower()) hive_kinit_cmd = format( "{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; " ) Execute(hive_kinit_cmd, user=params.hive_user) thriftserver_no_op_test = format( 'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1' ) Execute(format( '{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}' ), user=params.hive_user, environment={'JAVA_HOME': params.java_home}, not_if=thriftserver_no_op_test) elif action == 'stop': if name == 'jobhistoryserver': Execute(format('{spark_history_server_stop}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}) File(params.spark_history_server_pid_file, action="delete") elif name == 'sparkthriftserver': Execute(format('{spark_thrift_server_stop}'), user=params.hive_user, environment={'JAVA_HOME': params.java_home}) File(params.spark_thrift_server_pid_file, action="delete")
def service_check(self, env): import params env.set_params(params) input_file = format('/user/{smokeuser}/passwd') output_dir = format('/user/{smokeuser}/pigsmoke.out') params.HdfsResource( format("/user/{smokeuser}"), type="directory", action="create_on_execute", owner=params.smokeuser, mode=params.smoke_hdfs_user_mode, ) params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) params.HdfsResource(None, action="execute") if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) File(format("{tmp_dir}/pigSmoke.sh"), content=StaticFile("pigSmoke.sh"), mode=0755) # check for Pig-on-M/R Execute( format("pig {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'), user=params.smokeuser, logoutput=True) test_cmd = format("fs -test -e {output_dir}") ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) if params.stack_version_formatted and check_stack_feature( StackFeature.PIG_ON_TEZ, params.stack_version_formatted): # cleanup results from previous test params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) # Check for Pig-on-Tez resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute") Execute( format("pig -x tez {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin' ), user=params.smokeuser, logoutput=True) ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir)
def oozie(is_server=False): import params if is_server: params.HdfsResource(params.oozie_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.oozie_user, mode=params.oozie_hdfs_user_mode) params.HdfsResource(None, action="execute") Directory(params.conf_dir, recursive=True, owner=params.oozie_user, group=params.user_group) XmlConfig( "oozie-site.xml", conf_dir=params.conf_dir, configurations=params.oozie_site, configuration_attributes=params.config['configuration_attributes'] ['oozie-site'], owner=params.oozie_user, group=params.user_group, mode=0664) File( format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user, content=InlineTemplate(params.oozie_env_sh_template), group=params.user_group, ) if (params.log4j_props != None): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user, content=params.log4j_props) elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user) if params.hdp_stack_version != "" and compare_versions( params.hdp_stack_version, '2.2') >= 0: File(format("{params.conf_dir}/adminusers.txt"), mode=0644, group=params.user_group, owner=params.oozie_user, content=Template('adminusers.txt.j2', oozie_admin_users=params.oozie_admin_users)) else: File(format("{params.conf_dir}/adminusers.txt"), owner=params.oozie_user, group=params.user_group) if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": File( format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), ) pass oozie_ownership() if is_server: oozie_server_specific()
def falcon(type, action=None): import params if action == 'config': Directory(params.falcon_pid_dir, owner=params.falcon_user, recursive=True) Directory(params.falcon_log_dir, owner=params.falcon_user, recursive=True) Directory(params.falcon_webapp_dir, owner=params.falcon_user, recursive=True) Directory(params.falcon_home, owner=params.falcon_user, recursive=True) Directory(params.etc_prefix_dir, mode=0755, recursive=True) Directory(params.falcon_conf_dir, owner=params.falcon_user, recursive=True) File(params.falcon_conf_dir + '/falcon-env.sh', content=InlineTemplate(params.falcon_env_sh_template), owner=params.falcon_user) File(params.falcon_conf_dir + '/client.properties', content=Template('client.properties.j2'), mode=0644, owner=params.falcon_user) PropertiesFile(params.falcon_conf_dir + '/runtime.properties', properties=params.falcon_runtime_properties, mode=0644, owner=params.falcon_user) PropertiesFile(params.falcon_conf_dir + '/startup.properties', properties=params.falcon_startup_properties, mode=0644, owner=params.falcon_user) if params.falcon_graph_storage_directory: Directory(params.falcon_graph_storage_directory, owner=params.falcon_user, group=params.user_group, mode=0775, recursive=True, cd_access="a") if params.falcon_graph_serialize_path: Directory(params.falcon_graph_serialize_path, owner=params.falcon_user, group=params.user_group, mode=0775, recursive=True, cd_access="a") if type == 'server': if action == 'config': if params.store_uri[0:4] == "hdfs": params.HdfsResource(params.store_uri, type="directory", action="create_on_execute", owner=params.falcon_user, mode=0755) elif params.store_uri[0:4] == "file": Directory(params.store_uri[7:], owner=params.falcon_user, recursive=True) params.HdfsResource( params.flacon_apps_dir, type="directory", action="create_on_execute", owner=params.falcon_user, mode=0777 #TODO change to proper mode ) if params.falcon_store_uri[0:4] == "hdfs": params.HdfsResource(params.falcon_store_uri, type="directory", action="create_on_execute", owner=params.falcon_user, mode=0755) elif params.falcon_store_uri[0:4] == "file": Directory(params.falcon_store_uri[7:], owner=params.falcon_user, recursive=True) if params.supports_hive_dr: params.HdfsResource(params.dfs_data_mirroring_dir, type="directory", action="create_on_execute", owner=params.hdfs_user, group=params.user_group, recursive_chown=True, recursive_chmod=True, mode=0770, source=params.local_data_mirroring_dir) params.HdfsResource(None, action="execute") Directory(params.falcon_local_dir, owner=params.falcon_user, recursive=True, cd_access="a") if params.falcon_embeddedmq_enabled == True: Directory(os.path.abspath( os.path.join(params.falcon_embeddedmq_data, "..")), owner=params.falcon_user, recursive=True) Directory(params.falcon_embeddedmq_data, owner=params.falcon_user, recursive=True) if action == 'start': Execute( format('{falcon_home}/bin/falcon-start -port {falcon_port}'), user=params.falcon_user, path=params.hadoop_bin_dir) if action == 'stop': Execute(format('{falcon_home}/bin/falcon-stop'), user=params.falcon_user, path=params.hadoop_bin_dir) File(params.server_pid_file, action='delete')
def setup_ranger_kafka(): import params if params.enable_ranger_kafka: if params.retryAble: Logger.info( "Kafka: Setup ranger: command retry enables thus retrying if ranger admin is down !" ) else: Logger.info( "Kafka: Setup ranger: command retry not enabled thus skipping if ranger admin is down !" ) if params.has_namenode and params.xa_audit_hdfs_is_enabled: try: params.HdfsResource("/ranger/audit", type="directory", action="create_on_execute", owner=params.hdfs_user, group=params.hdfs_user, mode=0755, recursive_chmod=True) params.HdfsResource("/ranger/audit/kafka", type="directory", action="create_on_execute", owner=params.kafka_user, group=params.kafka_user, mode=0700, recursive_chmod=True) params.HdfsResource(None, action="execute") if params.is_ranger_kms_ssl_enabled: Logger.info( 'Ranger KMS is ssl enabled, configuring ssl-client for hdfs audits.' ) setup_configuration_file_for_required_plugins( component_user=params.kafka_user, component_group=params.user_group, create_core_site_path=params.conf_dir, configurations=params.config['configurations'] ['ssl-client'], configuration_attributes=params. config['configurationAttributes']['ssl-client'], file_name='ssl-client.xml') else: Logger.info( 'Ranger KMS is not ssl enabled, skipping ssl-client for hdfs audits.' ) except Exception, err: Logger.exception( "Audit directory creation in HDFS for KAFKA Ranger plugin failed with error:\n{0}" .format(err)) setup_ranger_plugin( 'kafka-broker', 'kafka', None, None, None, None, params.java64_home, params.repo_name, params.kafka_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_kafka, conf_dict=params.conf_dir, component_user=params.kafka_user, component_group=params.user_group, cache_service_list=['kafka'], plugin_audit_properties=params.ranger_kafka_audit, plugin_audit_attributes=params.ranger_kafka_audit_attrs, plugin_security_properties=params.ranger_kafka_security, plugin_security_attributes=params.ranger_kafka_security_attrs, plugin_policymgr_ssl_properties=params.ranger_kafka_policymgr_ssl, plugin_policymgr_ssl_attributes=params. ranger_kafka_policymgr_ssl_attrs, component_list=['kafka-broker'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, api_version='v2', skip_if_rangeradmin_down=not params.retryAble, is_security_enabled=params.kerberos_security_enabled, is_stack_supports_ranger_kerberos=params. stack_supports_ranger_kerberos, component_user_principal=params.kafka_jaas_principal if params.kerberos_security_enabled else None, component_user_keytab=params.kafka_keytab_path if params.kerberos_security_enabled else None) if params.enable_ranger_kafka: Execute(('cp', '--remove-destination', params.setup_ranger_env_sh_source, params.setup_ranger_env_sh_target), not_if=format("test -f {setup_ranger_env_sh_target}"), sudo=True) File(params.setup_ranger_env_sh_target, owner=params.kafka_user, group=params.user_group, mode=0755) if params.enable_ranger_kafka and params.kerberos_security_enabled: Execute( ('rm', '-f', os.path.join(params.conf_dir, "core-site.xml")), sudo=True) if params.has_namenode: Logger.info( "Stack supports core-site.xml creation for Ranger plugin and Namenode is installed, creating create core-site.xml from namenode configurations" ) setup_configuration_file_for_required_plugins( component_user=params.kafka_user, component_group=params.user_group, create_core_site_path=params.conf_dir, configurations=params.config['configurations'] ['core-site'], configuration_attributes=params. config['configurationAttributes']['core-site'], file_name='core-site.xml', xml_include_file=params. mount_table_xml_inclusion_file_full_path, xml_include_file_content=params.mount_table_content) else: Logger.info( "Stack supports core-site.xml creation for Ranger plugin and Namenode is not installed, creating create core-site.xml from default configurations" ) setup_configuration_file_for_required_plugins( component_user=params.kafka_user, component_group=params.user_group, create_core_site_path=params.conf_dir, configurations={ 'hadoop.security.authentication': 'kerberos' if params.kerberos_security_enabled else 'simple' }, configuration_attributes={}, file_name='core-site.xml') else: Logger.info( "Stack does not support core-site.xml creation for Ranger plugin, skipping core-site.xml configurations" )
def service_check(self, env): import params env.set_params(params) input_file = format('/user/{smokeuser}/passwd') output_dir = format('/user/{smokeuser}/pigsmoke.out') # cleanup output params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) # re-create input. Be able to delete it if it already exists params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) params.HdfsResource(None, action="execute") File(format("{tmp_dir}/pigSmoke.sh"), content=StaticFile("pigSmoke.sh"), mode=0755) # check for Pig-on-M/R Execute( format("pig {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin'), user=params.smokeuser, logoutput=True) test_cmd = format("fs -test -e {output_dir}") ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir) if params.iop_stack_version != "" and compare_versions( params.iop_stack_version, '4.0') >= 0: # cleanup results from previous test # cleanup output params.HdfsResource( output_dir, type="directory", action="delete_on_execute", owner=params.smokeuser, ) # re-create input. Be able to delete it firstly if it already exists params.HdfsResource( input_file, type="file", source="/etc/passwd", action="create_on_execute", owner=params.smokeuser, ) params.HdfsResource(None, action="execute") if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) Execute( format("pig {tmp_dir}/pigSmoke.sh"), tries=3, try_sleep=5, path=format( '{pig_bin_dir}:/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin' ), user=params.smokeuser, logoutput=True) ExecuteHadoop(test_cmd, user=params.smokeuser, conf_dir=params.hadoop_conf_dir, bin_dir=params.hadoop_bin_dir)
def setup_ranger_solr(): import params if params.has_ranger_admin and params.security_enabled: from resource_management.libraries.functions.setup_ranger_plugin_xml import setup_ranger_plugin if params.retryAble: Logger.info("Solr: Setup ranger: command retry enables thus retrying if ranger admin is down !") else: Logger.info("Solr: Setup ranger: command retry not enabled thus skipping if ranger admin is down !") if params.xml_configurations_supported and params.enable_ranger_solr and params.xa_audit_hdfs_is_enabled: if params.has_namenode: params.HdfsResource("/ranger/audit", type="directory", action="create_on_execute", owner=params.hdfs_user, group=params.hdfs_user, mode=0755, recursive_chmod=True ) params.HdfsResource("/ranger/audit/solr", type="directory", action="create_on_execute", owner=params.solr_user, group=params.solr_user, mode=0700, recursive_chmod=True ) params.HdfsResource(None, action="execute") File(os.path.join(params.stack_root, params.stack_version, "ranger-solr-plugin", "ranger_credential_helper.py"), mode=0755) jar_files = os.listdir(os.path.join(params.stack_root, params.stack_version, "ranger-solr-plugin/lib")) for jar_file in jar_files: plugin_dir = os.path.join(params.stack_root, params.stack_version, "ranger-solr-plugin/lib", jar_file) Execute(('ln','-sf', plugin_dir, format("{solr_dir}/server/solr-webapp/webapp/WEB-INF/lib/{jar_file}")), only_if=format('ls {plugin_dir}'), sudo=True ) setup_ranger_plugin('hadoop-hdfs-datanode', 'solr', params.previous_jdbc_jar, params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java64_home, params.repo_name, params.solr_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_solr, conf_dict=params.solr_conf, component_user=params.solr_user, component_group=params.user_group, cache_service_list=['solr'], plugin_audit_properties=params.ranger_solr_audit, plugin_audit_attributes=params.ranger_solr_audit_attrs, plugin_security_properties=params.ranger_solr_security, plugin_security_attributes=params.ranger_solr_security_attrs, plugin_policymgr_ssl_properties=params.ranger_solr_policymgr_ssl, plugin_policymgr_ssl_attributes=params.ranger_solr_policymgr_ssl_attrs, component_list=['solr'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, api_version = 'v2', skip_if_rangeradmin_down= not params.retryAble, is_security_enabled = params.security_enabled, is_stack_supports_ranger_kerberos = params.stack_supports_ranger_kerberos, component_user_principal=params.solr_kerberos_principal if params.security_enabled else None, component_user_keytab=params.solr_kerberos_keytab if params.security_enabled else None) properties_files = os.listdir(format('/etc/iop-solr/conf')) if params.security_enabled and params.enable_ranger_solr: solr_classes_dir = format("{solr_dir}/server/solr-webapp/webapp/WEB-INF/classes") Directory(solr_classes_dir, owner=params.solr_user, group=params.user_group, ignore_failures=True ) Execute(format('mkdir {solr_classes_dir}'), not_if=format('ls {solr_classes_dir}')) for properties_file in properties_files: Execute(('ln','-sf',format('/etc/iop-solr/conf/{properties_file}'), format("{solr_dir}/server/solr-webapp/webapp/WEB-INF/classes/{properties_file}")), only_if=format('ls /etc/iop-solr/conf/{properties_file}'), sudo=True) if params.enable_ranger_solr: set_solr_ranger_authorizer = params.zkcli_prefix + format('put {solr_znode}/security.json \'{{\"authentication":{{\"class\":\"org.apache.solr.security.KerberosPlugin\"}},\"authorization\":{{\"class\": ' + '\"org.apache.ranger.authorization.solr.authorizer.RangerSolrAuthorizer\"}}}}\'') Execute(set_solr_ranger_authorizer) elif params.security_enabled: Logger.info('Ranger Solr plugin is not enabled.') setup_kerberos_security_json = params.zkcli_prefix + format('put {solr_znode}/security.json \'{{\"authentication":{{\"class\":\"org.apache.solr.security.KerberosPlugin\"}}}\'') Execute(setup_kerberos_security_json) else: Logger.info('Security is disabled.') setup_kerberos_security_json = params.zkcli_prefix + format('put {solr_znode}/security.json \'{}\'') Execute(setup_kerberos_security_json)
# The destination directory must already exist if not os.path.exists(dest_path): Logger.error( "Cannot copy {0} because destination directory {1} does not exist." .format(source_file, dest_path)) return False filename = os.path.basename(source_file) dest_file = os.path.join(dest_path, filename) params.HdfsResource( dest_file, type=download_type, action="download_on_execute", source=source_file, group=user_group, owner=owner, mode=file_mode, replace_existing_files=replace_existing_files, ) Logger.info( "Will attempt to copy from DFS at {0} to local file system {1}.". format(source_file, dest_file)) # For improved performance, force_execute should be False so that it is delayed and combined with other calls. if force_execute: params.HdfsResource(None, action="execute") return True
def setup_hiveserver2(): import params File(params.start_hiveserver2_path, mode=0755, content=Template(format('{start_hiveserver2_script}'))) File(os.path.join(params.hive_server_conf_dir, "hadoop-metrics2-hiveserver2.properties"), owner=params.hive_user, group=params.user_group, content=Template("hadoop-metrics2-hiveserver2.properties.j2"), mode=0600) XmlConfig( "hiveserver2-site.xml", conf_dir=params.hive_server_conf_dir, configurations=params.config['configurations']['hiveserver2-site'], configuration_attributes=params.config['configuration_attributes'] ['hiveserver2-site'], owner=params.hive_user, group=params.user_group, mode=0600) # copy tarball to HDFS feature not supported if not (params.stack_version_formatted_major and check_stack_feature(StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major)): params.HdfsResource(params.webhcat_apps_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=0755) # Create webhcat dirs. if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir: params.HdfsResource(params.hcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=params.hcat_hdfs_user_mode) params.HdfsResource(params.webhcat_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.webhcat_user, mode=params.webhcat_hdfs_user_mode) # ****** Begin Copy Tarballs ****** # ********************************* # if copy tarball to HDFS feature supported copy mapreduce.tar.gz and tez.tar.gz to HDFS if params.stack_version_formatted_major and check_stack_feature( StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major): copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) copy_to_hdfs("tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs) # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode. # This can use a different source and dest location to account copy_to_hdfs("pig", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.pig_tar_source, custom_dest_file=params.pig_tar_dest_file, skip=params.sysprep_skip_copy_tarballs_hdfs) copy_to_hdfs("hive", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=params.hive_tar_source, custom_dest_file=params.hive_tar_dest_file, skip=params.sysprep_skip_copy_tarballs_hdfs) wildcard_tarballs = ["sqoop", "hadoop_streaming"] for tarball_name in wildcard_tarballs: source_file_pattern = eval("params." + tarball_name + "_tar_source") dest_dir = eval("params." + tarball_name + "_tar_dest_dir") if source_file_pattern is None or dest_dir is None: continue source_files = glob.glob( source_file_pattern) if "*" in source_file_pattern else [ source_file_pattern ] for source_file in source_files: src_filename = os.path.basename(source_file) dest_file = os.path.join(dest_dir, src_filename) copy_to_hdfs(tarball_name, params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, custom_source_file=source_file, custom_dest_file=dest_file, skip=params.sysprep_skip_copy_tarballs_hdfs) # ******* End Copy Tarballs ******* # ********************************* # if warehouse directory is in DFS if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse( params.default_fs).scheme: # Create Hive Metastore Warehouse Dir params.HdfsResource(params.hive_apps_whs_dir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.user_group, mode=params.hive_apps_whs_mode) else: Logger.info( format( "Not creating warehouse directory '{hive_apps_whs_dir}', as the location is not in DFS." )) # Create Hive User Dir params.HdfsResource(params.hive_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hive_user, mode=params.hive_hdfs_user_mode) if not is_empty(params.hive_exec_scratchdir) and not urlparse( params.hive_exec_scratchdir).path.startswith("/tmp"): params.HdfsResource( params.hive_exec_scratchdir, type="directory", action="create_on_execute", owner=params.hive_user, group=params.hdfs_user, mode=0777 ) # Hive expects this dir to be writeable by everyone as it is used as a temp dir params.HdfsResource(None, action="execute")
def setup_ranger_hive_interactive(upgrade_type=None): import params if params.enable_ranger_hive: stack_version = None if upgrade_type is not None: stack_version = params.version if params.retryAble: Logger.info("Hive2: Setup ranger: command retry enabled thus retrying if ranger admin is down !") else: Logger.info("Hive2: Setup ranger: command retry not enabled thus skipping if ranger admin is down !") if params.xml_configurations_supported and params.xa_audit_hdfs_is_enabled: params.HdfsResource("/ranger/audit", type="directory", action="create_on_execute", owner=params.hdfs_user, group=params.hdfs_user, mode=0755, recursive_chmod=True ) params.HdfsResource("/ranger/audit/hive2", type="directory", action="create_on_execute", owner=params.hive_user, group=params.hive_user, mode=0700, recursive_chmod=True ) params.HdfsResource(None, action="execute") import sys, os script_path = os.path.realpath(__file__).split('/services')[0] + '/hooks/before-INSTALL/scripts/ranger' sys.path.append(script_path) from setup_ranger_plugin_xml import setup_ranger_plugin setup_ranger_plugin('hive-server2-hive2', 'hive', params.ranger_previous_jdbc_jar, params.ranger_downloaded_custom_connector, params.ranger_driver_curl_source, params.ranger_driver_curl_target, params.java64_home, params.repo_name, params.hive_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_hive, conf_dict=params.hive_server_interactive_conf_dir, component_user=params.hive_user, component_group=params.user_group, cache_service_list=['hive-server2-hive2'], plugin_audit_properties=params.config['configurations']['ranger-hive-audit'], plugin_audit_attributes=params.config['configuration_attributes']['ranger-hive-audit'], plugin_security_properties=params.config['configurations']['ranger-hive-security'], plugin_security_attributes=params.config['configuration_attributes'][ 'ranger-hive-security'], plugin_policymgr_ssl_properties=params.config['configurations'][ 'ranger-hive-policymgr-ssl'], plugin_policymgr_ssl_attributes=params.config['configuration_attributes'][ 'ranger-hive-policymgr-ssl'], component_list=['hive-client', 'hive-metastore', 'hive-server2', 'hive-server2-hive2'], audit_db_is_enabled=False, credential_file=params.credential_file, xa_audit_db_password=None, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, stack_version_override=stack_version, skip_if_rangeradmin_down=not params.retryAble, api_version='v2', is_security_enabled=params.security_enabled, is_stack_supports_ranger_kerberos=params.stack_supports_ranger_kerberos, component_user_principal=params.hive_principal if params.security_enabled else None, component_user_keytab=params.hive_server2_keytab if params.security_enabled else None) else: Logger.info('Ranger Hive plugin is not enabled')
def setup_spark(env, type, upgrade_type=None, action=None): import params Directory([params.spark_pid_dir, params.spark_log_dir], owner=params.spark_user, group=params.user_group, mode=0775, create_parents=True) if type == 'server' and action == 'config': params.HdfsResource(params.spark_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.spark_user, mode=0775) params.HdfsResource(None, action="execute") PropertiesFile( format("{spark_conf}/spark-defaults.conf"), properties=params.config['configurations']['spark2-defaults'], key_value_delimiter=" ", owner=params.spark_user, group=params.spark_group, ) # create spark-env.sh in etc/conf dir File( os.path.join(params.spark_conf, 'spark-env.sh'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_env_sh), mode=0644, ) #create log4j.properties in etc/conf dir File( os.path.join(params.spark_conf, 'log4j.properties'), owner=params.spark_user, group=params.spark_group, content=params.spark_log4j_properties, mode=0644, ) #create metrics.properties in etc/conf dir File(os.path.join(params.spark_conf, 'metrics.properties'), owner=params.spark_user, group=params.spark_group, content=InlineTemplate(params.spark_metrics_properties)) Directory( params.spark_logs_dir, owner=params.spark_user, group=params.spark_group, mode=0755, ) if params.is_hive_installed: XmlConfig("hive-site.xml", conf_dir=params.spark_conf, configurations=params.spark_hive_properties, owner=params.spark_user, group=params.spark_group, mode=0644) if params.has_spark_thriftserver: PropertiesFile( params.spark_thrift_server_conf_file, properties=params.config['configurations'] ['spark2-thrift-sparkconf'], owner=params.hive_user, group=params.user_group, key_value_delimiter=" ", ) effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature( StackFeature.SPARK_16PLUS, effective_version): # create spark-thrift-fairscheduler.xml File(os.path.join(params.spark_conf, "spark-thrift-fairscheduler.xml"), owner=params.spark_user, group=params.spark_group, mode=0755, content=InlineTemplate(params.spark_thrift_fairscheduler_content))
def oozie(is_server=False, upgrade_type=None): import params if is_server: params.HdfsResource(params.oozie_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.oozie_user, mode=params.oozie_hdfs_user_mode) params.HdfsResource(None, action="execute") Directory(params.conf_dir, create_parents=True, owner=params.oozie_user, group=params.user_group) params.oozie_site = update_credential_provider_path( params.oozie_site, 'oozie-site', os.path.join(params.conf_dir, 'oozie-site.jceks'), params.oozie_user, params.user_group) XmlConfig( "oozie-site.xml", conf_dir=params.conf_dir, configurations=params.oozie_site, configuration_attributes=params.config['configuration_attributes'] ['oozie-site'], owner=params.oozie_user, group=params.user_group, mode=0664) File( format("{conf_dir}/oozie-env.sh"), owner=params.oozie_user, content=InlineTemplate(params.oozie_env_sh_template), group=params.user_group, ) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'oozie.conf'), owner='root', group='root', mode=0644, content=Template("oozie.conf.j2")) if (params.log4j_props != None): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user, content=InlineTemplate(params.log4j_props)) elif (os.path.exists(format("{params.conf_dir}/oozie-log4j.properties"))): File(format("{params.conf_dir}/oozie-log4j.properties"), mode=0644, group=params.user_group, owner=params.oozie_user) if params.stack_version_formatted and check_stack_feature( StackFeature.OOZIE_ADMIN_USER, params.stack_version_formatted): File(format("{params.conf_dir}/adminusers.txt"), mode=0644, group=params.user_group, owner=params.oozie_user, content=Template('adminusers.txt.j2', oozie_admin_users=params.oozie_admin_users)) else: File(format("{params.conf_dir}/adminusers.txt"), owner=params.oozie_user, group=params.user_group) if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": File( format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), ) pass oozie_ownership() if is_server: oozie_server_specific(upgrade_type)
def hbase( name=None # 'master' or 'regionserver' or 'client' ): import params #Directory( params.hbase_conf_dir_prefix, # mode=0755 #) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'hbase.conf'), owner='root', group='root', mode=0644, content=Template("hbase.conf.j2")) Directory(params.hbase_conf_dir, owner=params.hbase_user, group=params.user_group, create_parents=True) '''Directory (params.tmp_dir, owner = params.hbase_user, mode=0775, create_parents = True, cd_access="a", ) Directory (params.local_dir, owner = params.hbase_user, group = params.user_group, mode=0775, create_parents = True ) Directory (os.path.join(params.local_dir, "jars"), owner = params.hbase_user, group = params.user_group, mode=0775, create_parents = True )''' parent_dir = os.path.dirname(params.tmp_dir) # In case if we have several placeholders in path while ("${" in parent_dir): parent_dir = os.path.dirname(parent_dir) if parent_dir != os.path.abspath(os.sep): Directory( parent_dir, create_parents=True, cd_access="a", ) Execute(("chmod", "1777", parent_dir), sudo=True) XmlConfig( "hbase-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hbase-site'], configuration_attributes=params.config['configuration_attributes'] ['hbase-site'], owner=params.hbase_user, group=params.user_group) XmlConfig( "core-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configuration_attributes'] ['core-site'], owner=params.hbase_user, group=params.user_group) if 'hdfs-site' in params.config['configurations']: XmlConfig( "hdfs-site.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configuration_attributes'] ['hdfs-site'], owner=params.hbase_user, group=params.user_group) #XmlConfig("hdfs-site.xml", # conf_dir=params.hadoop_conf_dir, # configurations=params.config['configurations']['hdfs-site'], # configuration_attributes=params.config['configuration_attributes']['hdfs-site'], # owner=params.hdfs_user, # group=params.user_group #) if 'hbase-policy' in params.config['configurations']: XmlConfig( "hbase-policy.xml", conf_dir=params.hbase_conf_dir, configurations=params.config['configurations']['hbase-policy'], configuration_attributes=params.config['configuration_attributes'] ['hbase-policy'], owner=params.hbase_user, group=params.user_group) # Manually overriding ownership of file installed by hadoop package else: File(format("{params.hbase_conf_dir}/hbase-policy.xml"), owner=params.hbase_user, group=params.user_group) File(format("{hbase_conf_dir}/hbase-env.sh"), owner=params.hbase_user, content=InlineTemplate(params.hbase_env_sh_template)) hbase_TemplateConfig( params.metric_prop_file_name, tag='GANGLIA-MASTER' if name == 'master' else 'GANGLIA-RS') hbase_TemplateConfig('regionservers') if params.security_enabled: hbase_TemplateConfig(format("hbase_{name}_jaas.conf")) if name != "client": Directory(params.pid_dir, owner=params.hbase_user, create_parents=True) Directory(params.log_dir, owner=params.hbase_user, create_parents=True) if (params.log4j_props != None): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user, content=params.log4j_props) elif (os.path.exists(format("{params.hbase_conf_dir}/log4j.properties"))): File(format("{params.hbase_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user) if name in ["master", "regionserver"]: params.HdfsResource(params.hbase_hdfs_root_dir, type="directory", action="create_on_execute", owner=params.hbase_user) params.HdfsResource(params.hbase_staging_dir, type="directory", action="create_on_execute", owner=params.hbase_user, mode=0711) params.HdfsResource(params.hbase_hdfs_user_dir, type="directory", action="create_on_execute", owner=params.hbase_user, mode=params.hbase_hdfs_user_mode) params.HdfsResource(None, action="execute") # create java-opts in etc/hbase/conf dir for iop.version File(format("{params.hbase_conf_dir}/java-opts"), mode=0644, group=params.user_group, owner=params.hbase_user, content=params.hbase_javaopts_properties)
def copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type=None, upgrade_direction=None): """ If the Atlas Hive Hook direcotry is present, Atlas is installed, and this is the first Oozie Server, then copy the entire contents of that directory to the Oozie Sharelib in DFS, e.g., /usr/$stack/$current_version/atlas/hook/hive/ -> hdfs:///user/oozie/share/lib/lib_$timetamp/hive :param upgrade_type: If in the middle of a stack upgrade, the type as UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING :param upgrade_direction: If in the middle of a stack upgrade, the direction as Direction.UPGRADE or Direction.DOWNGRADE. """ import params # Calculate the effective version since this code can also be called during EU/RU in the upgrade direction. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version( params.version) if not check_stack_feature(StackFeature.ATLAS_HOOK_SUPPORT, effective_version): return # Important that oozie_server_hostnames is sorted by name so that this only runs on a single Oozie server. if not (len(params.oozie_server_hostnames) > 0 and params.hostname == params.oozie_server_hostnames[0]): Logger.debug( "Will not attempt to copy Atlas Hive hook to DFS since this is not the first Oozie Server " "sorted by hostname.") return if not has_atlas_in_cluster(): Logger.debug( "Will not attempt to copy Atlas Hve hook to DFS since Atlas is not installed on the cluster." ) return if upgrade_type is not None and upgrade_direction == Direction.DOWNGRADE: Logger.debug( "Will not attempt to copy Atlas Hve hook to DFS since in the middle of Rolling/Express upgrade " "and performing a Downgrade.") return effective_version = get_current_version(service="ATLAS") atlas_hive_hook_dir = format( "{stack_root}/{effective_version}/atlas/hook/hive/") if not os.path.exists(atlas_hive_hook_dir): Logger.error( format( "ERROR. Atlas is installed in cluster but this Oozie server doesn't " "contain directory {atlas_hive_hook_dir}")) return atlas_hive_hook_impl_dir = os.path.join(atlas_hive_hook_dir, "atlas-hive-plugin-impl") num_files = len([ name for name in os.listdir(atlas_hive_hook_impl_dir) if os.path.exists(os.path.join(atlas_hive_hook_impl_dir, name)) ]) Logger.info( "Found %d files/directories inside Atlas Hive hook impl directory %s" % (num_files, atlas_hive_hook_impl_dir)) # This can return over 100 files, so take the first 5 lines after "Available ShareLib" # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL command = format( r'source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -shareliblist hive | grep "\[Available ShareLib\]" -A 5' ) code, out = checked_call(command, user=params.oozie_user, tries=10, try_sleep=5, logoutput=True) hive_sharelib_dir = __parse_sharelib_from_output(out) if hive_sharelib_dir is None: raise Fail("Could not parse Hive sharelib from output.") Logger.info( format( "Parsed Hive sharelib = {hive_sharelib_dir} and will attempt to copy/replace {num_files} files to it from {atlas_hive_hook_impl_dir}" )) params.HdfsResource(hive_sharelib_dir, type="directory", action="create_on_execute", source=atlas_hive_hook_impl_dir, user=params.hdfs_user, owner=params.oozie_user, group=params.hdfs_user, mode=0755, recursive_chown=True, recursive_chmod=True, replace_existing_files=True) Logger.info( "Copying Atlas Hive hook properties file to Oozie Sharelib in DFS.") atlas_hook_filepath_source = os.path.join(params.hive_conf_dir, params.atlas_hook_filename) atlas_hook_file_path_dest_in_dfs = os.path.join(hive_sharelib_dir, params.atlas_hook_filename) params.HdfsResource(atlas_hook_file_path_dest_in_dfs, type="file", source=atlas_hook_filepath_source, action="create_on_execute", owner=params.oozie_user, group=params.hdfs_user, mode=0755, replace_existing_files=True) params.HdfsResource(None, action="execute") # Update the sharelib after making any changes # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL Execute( format( "source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -sharelibupdate" ), user=params.oozie_user, tries=5, try_sleep=5, logoutput=True, )
def service_check(self, env): import params env.set_params(params) unique = functions.get_unique_id_and_date() dir = params.hdfs_tmp_dir tmp_file = format("{dir}/{unique}") """ Ignore checking safemode, because this command is unable to get safemode state when 1 namenode is down in an HA setup (see more in HDFS-8277). Directly test HDFS availability by file system operations is consistent in both HA and non-HA environment. """ # safemode_command = format("dfsadmin -fs {namenode_address} -safemode get | grep OFF") if params.security_enabled: Execute(format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}" ), user=params.hdfs_user) #ExecuteHadoop(safemode_command, # user=params.hdfs_user, # logoutput=True, # conf_dir=params.hadoop_conf_dir, # try_sleep=3, # tries=20, # bin_dir=params.hadoop_bin_dir #) params.HdfsResource(dir, type="directory", action="create_on_execute", mode=0777) params.HdfsResource( tmp_file, type="file", action="delete_on_execute", ) params.HdfsResource(tmp_file, type="file", source="/etc/passwd", action="create_on_execute") params.HdfsResource(None, action="execute") if params.has_journalnode_hosts: if params.security_enabled: for host in params.journalnode_hosts: if params.https_only: uri = format("https://{host}:{journalnode_port}") else: uri = format("http://{host}:{journalnode_port}") response, errmsg, time_millis = curl_krb_request( params.tmp_dir, params.smoke_user_keytab, params.smokeuser_principal, uri, "jn_service_check", params.kinit_path_local, False, None, params.smoke_user) if not response: Logger.error( "Cannot access WEB UI on: {0}. Error : {1}", uri, errmsg) return 1 else: journalnode_port = params.journalnode_port checkWebUIFileName = "checkWebUI.py" checkWebUIFilePath = format("{tmp_dir}/{checkWebUIFileName}") comma_sep_jn_hosts = ",".join(params.journalnode_hosts) checkWebUICmd = format( "ambari-python-wrap {checkWebUIFilePath} -m {comma_sep_jn_hosts} -p {journalnode_port} -s {https_only} -o {script_https_protocol}" ) File(checkWebUIFilePath, content=StaticFile(checkWebUIFileName), mode=0775) Execute(checkWebUICmd, logoutput=True, try_sleep=3, tries=5, user=params.smoke_user) if params.is_namenode_master: if params.has_zkfc_hosts: pid_dir = format("{hadoop_pid_dir_prefix}/{hdfs_user}") pid_file = format("{pid_dir}/hadoop-{hdfs_user}-zkfc.pid") check_zkfc_process_cmd = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ), user=params.hdfs_user) Execute(check_zkfc_process_cmd, logoutput=True, try_sleep=3, tries=5)
def setup_ranger_hbase(upgrade_type=None, service_name="hbase-master"): import params if params.has_ranger_admin: stack_version = None if upgrade_type is not None: stack_version = params.version if params.retryAble: Logger.info("HBase: Setup ranger: command retry enables thus retrying if ranger admin is down !") else: Logger.info("HBase: Setup ranger: command retry not enabled thus skipping if ranger admin is down !") if params.xml_configurations_supported and params.enable_ranger_hbase and params.xa_audit_hdfs_is_enabled and service_name == 'hbase-master' : params.HdfsResource("/ranger/audit", type="directory", action="create_on_execute", owner=params.hdfs_user, group=params.hdfs_user, mode=0755, recursive_chmod=True ) params.HdfsResource("/ranger/audit/hbaseMaster", type="directory", action="create_on_execute", owner=params.hbase_user, group=params.hbase_user, mode=0700, recursive_chmod=True ) params.HdfsResource("/ranger/audit/hbaseRegional", type="directory", action="create_on_execute", owner=params.hbase_user, group=params.hbase_user, mode=0700, recursive_chmod=True ) params.HdfsResource(None, action="execute") if params.xml_configurations_supported: api_version=None if params.stack_supports_ranger_kerberos: api_version='v2' from resource_management.libraries.functions.setup_ranger_plugin_xml import setup_ranger_plugin setup_ranger_plugin('hbase-client', 'hbase', params.previous_jdbc_jar, params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java64_home, params.repo_name, params.hbase_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_hbase, conf_dict=params.hbase_conf_dir, component_user=params.hbase_user, component_group=params.user_group, cache_service_list=['hbaseMaster', 'hbaseRegional'], plugin_audit_properties=params.config['configurations']['ranger-hbase-audit'], plugin_audit_attributes=params.config['configuration_attributes']['ranger-hbase-audit'], plugin_security_properties=params.config['configurations']['ranger-hbase-security'], plugin_security_attributes=params.config['configuration_attributes']['ranger-hbase-security'], plugin_policymgr_ssl_properties=params.config['configurations']['ranger-hbase-policymgr-ssl'], plugin_policymgr_ssl_attributes=params.config['configuration_attributes']['ranger-hbase-policymgr-ssl'], component_list=['hbase-client', 'hbase-master', 'hbase-regionserver'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, stack_version_override = stack_version, skip_if_rangeradmin_down= not params.retryAble, api_version=api_version, is_security_enabled = params.security_enabled, is_stack_supports_ranger_kerberos = params.stack_supports_ranger_kerberos if params.security_enabled else None, component_user_principal=params.ranger_hbase_principal if params.security_enabled else None, component_user_keytab=params.ranger_hbase_keytab if params.security_enabled else None) else: from resource_management.libraries.functions.setup_ranger_plugin import setup_ranger_plugin setup_ranger_plugin('hbase-client', 'hbase', params.previous_jdbc_jar, params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java64_home, params.repo_name, params.hbase_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_hbase, conf_dict=params.hbase_conf_dir, component_user=params.hbase_user, component_group=params.user_group, cache_service_list=['hbaseMaster', 'hbaseRegional'], plugin_audit_properties=params.config['configurations']['ranger-hbase-audit'], plugin_audit_attributes=params.config['configuration_attributes']['ranger-hbase-audit'], plugin_security_properties=params.config['configurations']['ranger-hbase-security'], plugin_security_attributes=params.config['configuration_attributes']['ranger-hbase-security'], plugin_policymgr_ssl_properties=params.config['configurations']['ranger-hbase-policymgr-ssl'], plugin_policymgr_ssl_attributes=params.config['configuration_attributes']['ranger-hbase-policymgr-ssl'], component_list=['hbase-client', 'hbase-master', 'hbase-regionserver'], audit_db_is_enabled=params.xa_audit_db_is_enabled, credential_file=params.credential_file, xa_audit_db_password=params.xa_audit_db_password, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, stack_version_override = stack_version, skip_if_rangeradmin_down= not params.retryAble) else: Logger.info('Ranger admin not installed')
def falcon(type, action=None, upgrade_type=None): import params if action == 'config': Directory( params.falcon_pid_dir, owner=params.falcon_user, create_parents=True, mode=0755, cd_access="a", ) Directory( params.falcon_log_dir, owner=params.falcon_user, create_parents=True, mode=0755, cd_access="a", ) Directory(params.falcon_webapp_dir, owner=params.falcon_user, create_parents=True) Directory(params.falcon_home, owner=params.falcon_user, create_parents=True) Directory(params.etc_prefix_dir, mode=0755, create_parents=True) Directory(params.falcon_conf_dir, owner=params.falcon_user, create_parents=True) File( params.falcon_conf_dir + '/falcon-env.sh', content=InlineTemplate(params.falcon_env_sh_template), owner=params.falcon_user, group=params.user_group, ) PropertiesFile(params.falcon_conf_dir + '/client.properties', properties=params.falcon_client_properties, mode=0644, owner=params.falcon_user) PropertiesFile(params.falcon_conf_dir + '/runtime.properties', properties=params.falcon_runtime_properties, mode=0644, owner=params.falcon_user) PropertiesFile(params.falcon_conf_dir + '/startup.properties', properties=params.falcon_startup_properties, mode=0644, owner=params.falcon_user) File(params.falcon_conf_dir + '/log4j.properties', content=InlineTemplate(params.falcon_log4j), group=params.user_group, mode=0644, owner=params.falcon_user) if params.falcon_graph_storage_directory: Directory(params.falcon_graph_storage_directory, owner=params.falcon_user, group=params.user_group, mode=0775, create_parents=True, cd_access="a") if params.falcon_graph_serialize_path: Directory(params.falcon_graph_serialize_path, owner=params.falcon_user, group=params.user_group, mode=0775, create_parents=True, cd_access="a") # Generate atlas-application.properties.xml file if params.falcon_atlas_support and params.enable_atlas_hook: # If Atlas is added later than Falcon, this package will be absent. if check_stack_feature( StackFeature.ATLAS_INSTALL_HOOK_PACKAGE_SUPPORT, params.current_version_formatted): install_atlas_hook_packages( params.atlas_plugin_package, params.atlas_ubuntu_plugin_package, params.host_sys_prepped, params.agent_stack_retry_on_unavailability, params.agent_stack_retry_count) atlas_hook_filepath = os.path.join(params.falcon_conf_dir, params.atlas_hook_filename) setup_atlas_hook(SERVICE.FALCON, params.falcon_atlas_application_properties, atlas_hook_filepath, params.falcon_user, params.user_group) # Falcon 0.10 uses FALCON_EXTRA_CLASS_PATH. # Setup symlinks for older versions. if params.current_version_formatted and check_stack_feature( StackFeature.FALCON_ATLAS_SUPPORT_2_3, params.current_version_formatted): setup_atlas_jar_symlinks("falcon", params.falcon_webinf_lib) if type == 'server': if action == 'config': if params.store_uri[0:4] == "hdfs": params.HdfsResource(params.store_uri, type="directory", action="create_on_execute", owner=params.falcon_user, mode=0755) elif params.store_uri[0:4] == "file": Directory(params.store_uri[7:], owner=params.falcon_user, create_parents=True) # TODO change to proper mode params.HdfsResource(params.falcon_apps_dir, type="directory", action="create_on_execute", owner=params.falcon_user, mode=0777) # In HDP 2.4 and earlier, the data-mirroring directory was copied to HDFS. if params.supports_data_mirroring: params.HdfsResource(params.dfs_data_mirroring_dir, type="directory", action="create_on_execute", owner=params.falcon_user, group=params.proxyuser_group, recursive_chown=True, recursive_chmod=True, mode=0770, source=params.local_data_mirroring_dir) # Falcon Extensions were supported in HDP 2.5 and higher. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version( params.version) supports_falcon_extensions = effective_version and check_stack_feature( StackFeature.FALCON_EXTENSIONS, effective_version) if supports_falcon_extensions: params.HdfsResource(params.falcon_extensions_dest_dir, type="directory", action="create_on_execute", owner=params.falcon_user, group=params.proxyuser_group, recursive_chown=True, recursive_chmod=True, mode=0755, source=params.falcon_extensions_source_dir) # Create the extensons HiveDR store params.HdfsResource(os.path.join( params.falcon_extensions_dest_dir, "mirroring"), type="directory", action="create_on_execute", owner=params.falcon_user, group=params.proxyuser_group, mode=0770) # At least one HDFS Dir should be created, so execute the change now. params.HdfsResource(None, action="execute") Directory(params.falcon_local_dir, owner=params.falcon_user, create_parents=True, cd_access="a") if params.falcon_embeddedmq_enabled == True: Directory(os.path.abspath( os.path.join(params.falcon_embeddedmq_data, "..")), owner=params.falcon_user, create_parents=True) Directory(params.falcon_embeddedmq_data, owner=params.falcon_user, create_parents=True) # although Falcon's falcon-config.sh will use 'which hadoop' to figure # this out, in an upgraded cluster, it's possible that 'which hadoop' # still points to older binaries; it's safer to just pass in the # hadoop home directory to use environment_dictionary = {"HADOOP_HOME": params.hadoop_home_dir} pid = get_user_call_output.get_user_call_output( format("cat {server_pid_file}"), user=params.falcon_user, is_checked_call=False)[1] process_exists = format("ls {server_pid_file} && ps -p {pid}") if action == 'start': try: Execute( format('{falcon_home}/bin/falcon-config.sh server falcon'), user=params.falcon_user, path=params.hadoop_bin_dir, environment=environment_dictionary, not_if=process_exists, ) except: show_logs(params.falcon_log_dir, params.falcon_user) raise if not os.path.exists(params.target_jar_file): try: File(params.target_jar_file, content=DownloadSource(params.bdb_resource_name), mode=0755) except: exc_msg = traceback.format_exc() exception_message = format( "Caught Exception while downloading {bdb_resource_name}:\n{exc_msg}" ) Logger.error(exception_message) if not os.path.isfile(params.target_jar_file): error_message = """ If you are using bdb as the Falcon graph db store, please run ambari-server setup --jdbc-db=bdb --jdbc-driver=<path to je5.0.73.jar> on the ambari server host. Otherwise falcon startup will fail. Otherwise please configure Falcon to use HBase as the backend as described in the Falcon documentation. """ Logger.error(error_message) try: Execute( format( '{falcon_home}/bin/falcon-start -port {falcon_port}'), user=params.falcon_user, path=params.hadoop_bin_dir, environment=environment_dictionary, not_if=process_exists, ) except: show_logs(params.falcon_log_dir, params.falcon_user) raise if action == 'stop': try: Execute(format('{falcon_home}/bin/falcon-stop'), user=params.falcon_user, path=params.hadoop_bin_dir, environment=environment_dictionary) except: show_logs(params.falcon_log_dir, params.falcon_user) raise File(params.server_pid_file, action='delete')
def oozie_smoke_shell_file(file_name, prepare_hdfs_file_name): import params File(format("{tmp_dir}/{file_name}"), content=StaticFile(file_name), mode=0755) File(format("{tmp_dir}/{prepare_hdfs_file_name}"), content=StaticFile(prepare_hdfs_file_name), mode=0755) os_family = System.get_instance().os_family oozie_examples_dir = glob.glob(params.oozie_examples_regex)[0] Execute(format( "{tmp_dir}/{prepare_hdfs_file_name} {conf_dir} {oozie_examples_dir} {hadoop_conf_dir} {service_check_queue_name}" ), tries=3, try_sleep=5, logoutput=True) params.HdfsResource( format("/user/{smokeuser}"), type="directory", action="create_on_execute", owner=params.smokeuser, mode=params.smoke_hdfs_user_mode, ) examples_dir = format('/user/{smokeuser}/examples') params.HdfsResource(examples_dir, action="delete_on_execute", type="directory") params.HdfsResource(examples_dir, action="create_on_execute", type="directory", source=format("{oozie_examples_dir}/examples"), owner=params.smokeuser, group=params.user_group) input_data_dir = format('/user/{smokeuser}/input-data') params.HdfsResource(input_data_dir, action="delete_on_execute", type="directory") params.HdfsResource( input_data_dir, action="create_on_execute", type="directory", source=format("{oozie_examples_dir}/examples/input-data"), owner=params.smokeuser, group=params.user_group) params.HdfsResource(None, action="execute") if params.security_enabled: sh_cmd = format( "{tmp_dir}/{file_name} {os_family} {oozie_lib_dir} {conf_dir} {oozie_bin_dir} {oozie_base_url} {oozie_examples_dir} {hadoop_conf_dir} {hadoop_bin_dir} {smokeuser} {security_enabled} {smokeuser_keytab} {kinit_path_local} {smokeuser_principal}" ) else: sh_cmd = format( "{tmp_dir}/{file_name} {os_family} {oozie_lib_dir} {conf_dir} {oozie_bin_dir} {oozie_base_url} {oozie_examples_dir} {hadoop_conf_dir} {hadoop_bin_dir} {smokeuser} {security_enabled}" ) Execute(sh_cmd, path=params.execute_path, tries=3, try_sleep=5, logoutput=True)
def setup_ranger_atlas(upgrade_type=None): import params if params.enable_ranger_atlas: if params.retry_enabled: Logger.info( "ATLAS: Setup ranger: command retry enables thus retrying if ranger admin is down !" ) else: Logger.info( "ATLAS: Setup ranger: command retry not enabled thus skipping if ranger admin is down !" ) if params.has_namenode and params.xa_audit_hdfs_is_enabled: try: params.HdfsResource("/ranger/audit", type="directory", action="create_on_execute", owner=params.metadata_user, group=params.user_group, mode=0755, recursive_chmod=True) params.HdfsResource("/ranger/audit/atlas", type="directory", action="create_on_execute", owner=params.metadata_user, group=params.user_group, mode=0700, recursive_chmod=True) params.HdfsResource(None, action="execute") if params.is_ranger_kms_ssl_enabled: Logger.info( 'Ranger KMS is ssl enabled, configuring ssl-client for hdfs audits.' ) setup_configuration_file_for_required_plugins( component_user=params.metadata_user, component_group=params.user_group, create_core_site_path=params.conf_dir, configurations=params.config['configurations'] ['ssl-client'], configuration_attributes=params. config['configurationAttributes']['ssl-client'], file_name='ssl-client.xml') else: Logger.info( 'Ranger KMS is not ssl enabled, skipping ssl-client for hdfs audits.' ) except Exception, err: Logger.exception( "Audit directory creation in HDFS for ATLAS Ranger plugin failed with error:\n{0}" .format(err)) setup_ranger_plugin( 'atlas-server', 'atlas', None, params.downloaded_custom_connector, params.driver_curl_source, params.driver_curl_target, params.java64_home, params.repo_name, params.atlas_ranger_plugin_repo, params.ranger_env, params.ranger_plugin_properties, params.policy_user, params.policymgr_mgr_url, params.enable_ranger_atlas, conf_dict=params.conf_dir, component_user=params.metadata_user, component_group=params.user_group, cache_service_list=['atlas'], plugin_audit_properties=params.config['configurations'] ['ranger-atlas-audit'], plugin_audit_attributes=params.config['configurationAttributes'] ['ranger-atlas-audit'], plugin_security_properties=params.config['configurations'] ['ranger-atlas-security'], plugin_security_attributes=params.config['configurationAttributes'] ['ranger-atlas-security'], plugin_policymgr_ssl_properties=params.config['configurations'] ['ranger-atlas-policymgr-ssl'], plugin_policymgr_ssl_attributes=params. config['configurationAttributes']['ranger-atlas-policymgr-ssl'], component_list=['atlas-server'], audit_db_is_enabled=False, credential_file=params.credential_file, xa_audit_db_password=None, ssl_truststore_password=params.ssl_truststore_password, ssl_keystore_password=params.ssl_keystore_password, api_version='v2', skip_if_rangeradmin_down=not params.retry_enabled, is_security_enabled=params.security_enabled, is_stack_supports_ranger_kerberos=params. stack_supports_ranger_kerberos, component_user_principal=params.atlas_jaas_principal if params.security_enabled else None, component_user_keytab=params.atlas_keytab_path if params.security_enabled else None)
def service_check(self, env): import params env.set_params(params) params.HdfsResource( format("/user/{smokeuser}"), type="directory", action="create_on_execute", owner=params.smokeuser, mode=params.smoke_hdfs_user_mode, ) path_to_distributed_shell_jar = format( "{params.install_dir}/share/hadoop/yarn/hadoop-yarn-applications-distributedshell*.jar" ) yarn_distrubuted_shell_check_params = [ params.install_dir + "/bin/yarn org.apache.hadoop.yarn.applications.distributedshell.Client", "-shell_command", "ls", "-num_containers", "{number_of_nm}", "-jar", "{path_to_distributed_shell_jar}", "-timeout", "300000", "--queue", "{service_check_queue_name}" ] yarn_distrubuted_shell_check_cmd = format( " ".join(yarn_distrubuted_shell_check_params)) if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) smoke_cmd = format( "{kinit_cmd} {yarn_distrubuted_shell_check_cmd}") else: smoke_cmd = yarn_distrubuted_shell_check_cmd return_code, out = shell.checked_call( smoke_cmd, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', user=params.smokeuser, ) m = re.search("appTrackingUrl=(.*),\s", out) app_url = m.group(1) splitted_app_url = str(app_url).split('/') for item in splitted_app_url: if "application" in item: application_name = item # Find out the active RM from RM list # Raise an exception if the active rm cannot be determined active_rm_webapp_address = self.get_active_rm_webapp_address() Logger.info("Active Resource Manager web app address is : " + active_rm_webapp_address) # Verify job state from active resource manager via rest api info_app_url = params.scheme + "://" + active_rm_webapp_address + "/ws/v1/cluster/apps/" + application_name get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) try: json_response = json.loads(stdout) except Exception as e: raise Fail( format( "Response from YARN API was not a valid JSON. Response: {stdout}" )) if json_response is None or 'app' not in json_response or \ 'state' not in json_response['app'] or 'finalStatus' not in json_response['app']: raise Fail("Application " + app_url + " returns invalid data.") if json_response['app']['state'] != "FINISHED" or json_response['app'][ 'finalStatus'] != "SUCCEEDED": raise Fail( "Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED.")
def oozie_service(action='start', upgrade_type=None): """ Starts or stops the Oozie service :param action: 'start' or 'stop' :param upgrade_type: type of upgrade, either "rolling" or "non_rolling" skipped since a variation of them was performed during the rolling upgrade :return: """ import params environment = {'OOZIE_CONFIG': params.conf_dir} if params.security_enabled: if params.oozie_principal is None: oozie_principal_with_host = 'missing_principal' else: oozie_principal_with_host = params.oozie_principal.replace( "_HOST", params.hostname) kinit_if_needed = format( "{kinit_path_local} -kt {oozie_keytab} {oozie_principal_with_host};" ) else: kinit_if_needed = "" no_op_test = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ), user=params.oozie_user) if action == 'start': start_cmd = format( "cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-start.sh") path_to_jdbc = params.target if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \ params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \ params.jdbc_driver_name == "org.postgresql.Driver" or \ params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver": if not params.jdbc_driver_jar: path_to_jdbc = format("{oozie_libext_dir}/") + \ params.default_connectors_map[params.jdbc_driver_name] if params.jdbc_driver_name in params.default_connectors_map else None if not os.path.isfile(path_to_jdbc): path_to_jdbc = format("{oozie_libext_dir}/") + "*" error_message = "Error! Sorry, but we can't find jdbc driver with default name " + params.default_connectors_map[params.jdbc_driver_name] + \ " in oozie lib dir. So, db connection check can fail. Please run 'ambari-server setup --jdbc-db={db_name} --jdbc-driver={path_to_jdbc} on server host.'" Logger.error(error_message) db_connection_check_command = format( "{java_home}/bin/java -cp {check_db_connection_jar}:{path_to_jdbc} org.apache.ambari.server.DBConnectionVerification '{oozie_jdbc_connection_url}' {oozie_metastore_user_name} {oozie_metastore_user_passwd!p} {jdbc_driver_name}" ) else: db_connection_check_command = None if upgrade_type is None: if not os.path.isfile( path_to_jdbc ) and params.jdbc_driver_name == "org.postgresql.Driver": print format("ERROR: jdbc file {target} is unavailable. Please, follow next steps:\n" \ "1) Download postgresql-9.0-801.jdbc4.jar.\n2) Create needed directory: mkdir -p {oozie_home}/libserver/\n" \ "3) Copy postgresql-9.0-801.jdbc4.jar to newly created dir: cp /path/to/jdbc/postgresql-9.0-801.jdbc4.jar " \ "{oozie_home}/libserver/\n4) Copy postgresql-9.0-801.jdbc4.jar to libext: cp " \ "/path/to/jdbc/postgresql-9.0-801.jdbc4.jar {oozie_home}/libext/\n") exit(1) if db_connection_check_command: Execute( db_connection_check_command, tries=5, try_sleep=10, user=params.oozie_user, ) Execute(format( "cd {oozie_tmp_dir} && {oozie_home}/bin/ooziedb.sh create -sqlfile oozie.sql -run" ), user=params.oozie_user, not_if=no_op_test, ignore_failures=True) if params.security_enabled: Execute( kinit_if_needed, user=params.oozie_user, ) if params.host_sys_prepped: print "Skipping creation of oozie sharelib as host is sys prepped" hdfs_share_dir_exists = True # skip time-expensive hadoop fs -ls check elif WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): # check with webhdfs is much faster than executing hadoop fs -ls. util = WebHDFSUtil(params.hdfs_site, params.oozie_user, params.security_enabled) list_status = util.run_command(params.hdfs_share_dir, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) hdfs_share_dir_exists = ('FileStatus' in list_status) else: # have to do time expensive hadoop fs -ls check. hdfs_share_dir_exists = shell.call(format( "{kinit_if_needed} hadoop --config {hadoop_conf_dir} dfs -ls {hdfs_share_dir} | awk 'BEGIN {{count=0;}} /share/ {{count++}} END {{if (count > 0) {{exit 0}} else {{exit 1}}}}'" ), user=params.oozie_user)[0] if not hdfs_share_dir_exists: Execute(params.put_shared_lib_to_hdfs_cmd, user=params.oozie_user, path=params.execute_path) params.HdfsResource( format("{oozie_hdfs_user_dir}/share"), type="directory", action="create_on_execute", mode=0755, recursive_chmod=True, ) params.HdfsResource(None, action="execute") try: # start oozie Execute(start_cmd, environment=environment, user=params.oozie_user, not_if=no_op_test) except: show_logs(params.oozie_log_dir, params.oozie_user) raise elif action == 'stop': stop_cmd = format( "cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-stop.sh") try: # stop oozie Execute(stop_cmd, environment=environment, only_if=no_op_test, user=params.oozie_user) except: show_logs(params.oozie_log_dir, params.oozie_user) raise File(params.pid_file, action="delete")
# cannot finish the copy to the final destination, and both fail! # For this reason, the file name on the destination must be unique, and we then rename it to the intended value. # The rename operation is synchronized by the Namenode. #unique_string = str(uuid.uuid4())[:8] #temp_dest_file = dest_file + "." + unique_string # The logic above cannot be used until fast-hdfs-resource.jar supports the mv command, or it switches # to WebHDFS. # If the directory already exists, it is a NO-OP dest_dir = os.path.dirname(dest_file) params.HdfsResource(dest_dir, type="directory", action="create_on_execute", owner=owner, mode=0555 ) # If the file already exists, it is a NO-OP params.HdfsResource(dest_file, type="file", action="create_on_execute", source=source_file, group=user_group, owner=owner, mode=0444, replace_existing_files=replace_existing_files, ) Logger.info("Will attempt to copy {0} tarball from {1} to DFS at {2}.".format(name, source_file, dest_file))
def yarn(name=None): import params if name == "historyserver": if params.yarn_log_aggregation_enabled: params.HdfsResource(params.yarn_nm_app_log_dir, action="create_on_execute", type="directory", owner=params.yarn_user, group=params.user_group, mode=0777, recursive_chmod=True) params.HdfsResource("/mapred", type="directory", action="create_on_execute", owner=params.mapred_user) params.HdfsResource("/mapred/system", type="directory", action="create_on_execute", owner=params.hdfs_user) params.HdfsResource(params.mapreduce_jobhistory_done_dir, type="directory", action="create_on_execute", owner=params.mapred_user, group=params.user_group, change_permissions_for_parents=True, mode=0777) params.HdfsResource(None, action="execute") Directory( params.jhs_leveldb_state_store_dir, owner=params.mapred_user, group=params.user_group, recursive=True, cd_access="a", ) Execute( ("chown", "-R", format("{mapred_user}:{user_group}"), params.jhs_leveldb_state_store_dir), sudo=True, ) if name == "nodemanager": # First start after enabling/disabling security if params.toggle_nm_security: Directory(params.nm_local_dirs.split(',') + params.nm_log_dirs.split(','), action='delete') # If yarn.nodemanager.recovery.dir exists, remove this dir if params.yarn_nodemanager_recovery_dir: Directory(InlineTemplate( params.yarn_nodemanager_recovery_dir).get_content(), action='delete') # Setting NM marker file if params.security_enabled: File( params.nm_security_marker, content= "Marker file to track first start after enabling/disabling security. " "During first start yarn local, log dirs are removed and recreated" ) elif not params.security_enabled: File(params.nm_security_marker, action="delete") if not params.security_enabled or params.toggle_nm_security: Directory(params.nm_local_dirs.split(',') + params.nm_log_dirs.split(','), owner=params.yarn_user, group=params.user_group, recursive=True, cd_access="a", ignore_failures=True, mode=0775) if params.yarn_nodemanager_recovery_dir: Directory( InlineTemplate(params.yarn_nodemanager_recovery_dir).get_content(), owner=params.yarn_user, group=params.user_group, recursive=True, mode=0755, cd_access='a', ) Directory( [params.yarn_pid_dir_prefix, params.yarn_pid_dir, params.yarn_log_dir], owner=params.yarn_user, group=params.user_group, recursive=True, cd_access='a', ) Directory( [ params.mapred_pid_dir_prefix, params.mapred_pid_dir, params.mapred_log_dir_prefix, params.mapred_log_dir ], owner=params.mapred_user, group=params.user_group, recursive=True, cd_access='a', ) Directory( [params.yarn_log_dir_prefix], owner=params.yarn_user, recursive=True, ignore_failures=True, cd_access='a', ) XmlConfig( "core-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['core-site'], configuration_attributes=params.config['configuration_attributes'] ['core-site'], owner=params.hdfs_user, group=params.user_group, mode=0644) # During RU, Core Masters and Slaves need hdfs-site.xml # TODO, instead of specifying individual configs, which is susceptible to breaking when new configs are added, # RU should rely on all available in /usr/hdp/<version>/hadoop/conf if 'hdfs-site' in params.config['configurations']: XmlConfig( "hdfs-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['hdfs-site'], configuration_attributes=params.config['configuration_attributes'] ['hdfs-site'], owner=params.hdfs_user, group=params.user_group, mode=0644) XmlConfig( "mapred-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes'] ['mapred-site'], owner=params.yarn_user, group=params.user_group, mode=0644) XmlConfig( "yarn-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['yarn-site'], configuration_attributes=params.config['configuration_attributes'] ['yarn-site'], owner=params.yarn_user, group=params.user_group, mode=0644) XmlConfig( "capacity-scheduler.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['capacity-scheduler'], configuration_attributes=params.config['configuration_attributes'] ['capacity-scheduler'], owner=params.yarn_user, group=params.user_group, mode=0644) if name == 'resourcemanager': File(params.yarn_job_summary_log, owner=params.yarn_user, group=params.user_group) if not is_empty( params.node_label_enable ) and params.node_label_enable or is_empty( params.node_label_enable) and params.node_labels_dir: params.HdfsResource(params.node_labels_dir, type="directory", action="create_on_execute", change_permissions_for_parents=True, owner=params.yarn_user, group=params.user_group, mode=0700) params.HdfsResource(None, action="execute") if params.toggle_rm_security: Execute( 'yarn resourcemanager -format-state-store', user=params.yarn_user, ) # Setting RM marker file if params.security_enabled: File( params.rm_security_marker, content= "Marker file to track first start after enabling/disabling security. " "During first start ResourceManager state store is formatted" ) elif not params.security_enabled: File(params.rm_security_marker, action="delete") elif name == 'apptimelineserver': Directory( params.ats_leveldb_dir, owner=params.yarn_user, group=params.user_group, recursive=True, cd_access="a", ) # if HDP stack is greater than/equal to 2.2, mkdir for state store property (added in 2.2) if (Script.is_hdp_stack_greater_or_equal("2.2")): Directory( params.ats_leveldb_state_store_dir, owner=params.yarn_user, group=params.user_group, recursive=True, cd_access="a", ) File(params.rm_nodes_exclude_path, owner=params.yarn_user, group=params.user_group) File(format("{limits_conf_dir}/yarn.conf"), mode=0644, content=Template('yarn.conf.j2')) File(format("{limits_conf_dir}/mapreduce.conf"), mode=0644, content=Template('mapreduce.conf.j2')) File(format("{hadoop_conf_dir}/yarn-env.sh"), owner=params.yarn_user, group=params.user_group, mode=0755, content=InlineTemplate(params.yarn_env_sh_template)) container_executor = format("{yarn_container_bin}/container-executor") File(container_executor, group=params.yarn_executor_container_group, mode=params.container_executor_mode) File(format("{hadoop_conf_dir}/container-executor.cfg"), group=params.user_group, mode=0644, content=Template('container-executor.cfg.j2')) Directory(params.cgroups_dir, group=params.user_group, recursive=True, mode=0755, cd_access="a") if params.security_enabled: tc_mode = 0644 tc_owner = "root" else: tc_mode = None tc_owner = params.hdfs_user File(format("{hadoop_conf_dir}/mapred-env.sh"), owner=tc_owner, content=InlineTemplate(params.mapred_env_sh_template)) if params.security_enabled: File(os.path.join(params.hadoop_bin, "task-controller"), owner="root", group=params.mapred_tt_group, mode=06050) File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'), owner=tc_owner, mode=tc_mode, group=params.mapred_tt_group, content=Template("taskcontroller.cfg.j2")) else: File(os.path.join(params.hadoop_conf_dir, 'taskcontroller.cfg'), owner=tc_owner, content=Template("taskcontroller.cfg.j2")) if "mapred-site" in params.config['configurations']: XmlConfig( "mapred-site.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['mapred-site'], configuration_attributes=params.config['configuration_attributes'] ['mapred-site'], owner=params.mapred_user, group=params.user_group) if "capacity-scheduler" in params.config['configurations']: XmlConfig( "capacity-scheduler.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations'] ['capacity-scheduler'], configuration_attributes=params.config['configuration_attributes'] ['capacity-scheduler'], owner=params.hdfs_user, group=params.user_group) if "ssl-client" in params.config['configurations']: XmlConfig( "ssl-client.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configuration_attributes'] ['ssl-client'], owner=params.hdfs_user, group=params.user_group) Directory( params.hadoop_conf_secure_dir, recursive=True, owner='root', group=params.user_group, cd_access='a', ) XmlConfig( "ssl-client.xml", conf_dir=params.hadoop_conf_secure_dir, configurations=params.config['configurations']['ssl-client'], configuration_attributes=params.config['configuration_attributes'] ['ssl-client'], owner=params.hdfs_user, group=params.user_group) if "ssl-server" in params.config['configurations']: XmlConfig( "ssl-server.xml", conf_dir=params.hadoop_conf_dir, configurations=params.config['configurations']['ssl-server'], configuration_attributes=params.config['configuration_attributes'] ['ssl-server'], owner=params.hdfs_user, group=params.user_group) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml')): File(os.path.join(params.hadoop_conf_dir, 'fair-scheduler.xml'), owner=params.mapred_user, group=params.user_group) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example')): File(os.path.join(params.hadoop_conf_dir, 'ssl-client.xml.example'), owner=params.mapred_user, group=params.user_group) if os.path.exists( os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example')): File(os.path.join(params.hadoop_conf_dir, 'ssl-server.xml.example'), owner=params.mapred_user, group=params.user_group)
def spark_service(name, upgrade_type=None, action=None): import params if action == 'start': effective_version = params.version if upgrade_type is not None else params.stack_version_formatted if effective_version: effective_version = format_stack_version(effective_version) if effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version): # create & copy spark2-hdp-yarn-archive.tar.gz to hdfs source_dir = params.spark_home + "/jars" tmp_archive_file = "/tmp/spark2/spark2-hdp-yarn-archive.tar.gz" make_tarfile(tmp_archive_file, source_dir) copy_to_hdfs("spark2", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) # create spark history directory params.HdfsResource(params.spark_history_dir, type="directory", action="create_on_execute", owner=params.spark_user, group=params.user_group, mode=0777, recursive_chmod=True) params.HdfsResource(None, action="execute") if params.security_enabled: spark_kinit_cmd = format( "{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; " ) Execute(spark_kinit_cmd, user=params.spark_user) # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not # need to copy the tarball, otherwise, copy it. if params.stack_version_formatted and check_stack_feature( StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted): resource_created = copy_to_hdfs( "tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute") if name == 'jobhistoryserver': historyserver_no_op_test = format( 'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1' ) try: Execute(format('{spark_history_server_start}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}, not_if=historyserver_no_op_test) except: show_logs(params.spark_log_dir, user=params.spark_user) raise elif name == 'sparkthriftserver': if params.security_enabled: hive_principal = params.hive_kerberos_principal.replace( '_HOST', socket.getfqdn().lower()) hive_kinit_cmd = format( "{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; " ) Execute(hive_kinit_cmd, user=params.hive_user) thriftserver_no_op_test = format( 'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1' ) try: Execute(format( '{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}' ), user=params.hive_user, environment={'JAVA_HOME': params.java_home}, not_if=thriftserver_no_op_test) except: show_logs(params.spark_log_dir, user=params.hive_user) raise elif action == 'stop': if name == 'jobhistoryserver': try: Execute(format('{spark_history_server_stop}'), user=params.spark_user, environment={'JAVA_HOME': params.java_home}) except: show_logs(params.spark_log_dir, user=params.spark_user) raise File(params.spark_history_server_pid_file, action="delete") elif name == 'sparkthriftserver': try: Execute(format('{spark_thrift_server_stop}'), user=params.hive_user, environment={'JAVA_HOME': params.java_home}) except: show_logs(params.spark_log_dir, user=params.hive_user) raise File(params.spark_thrift_server_pid_file, action="delete")
def hive_interactive(name=None): import params # list of properties that should be excluded from the config # this approach is a compromise against adding a dedicated config # type for hive_server_interactive or needed config groups on a # per component basis exclude_list = ['hive.enforce.bucketing', 'hive.enforce.sorting'] # Copy Tarballs in HDFS. if params.stack_version_formatted_major and check_stack_feature( StackFeature.ROLLING_UPGRADE, params.stack_version_formatted_major): resource_created = copy_to_hdfs( "tez_hive2", params.user_group, params.hdfs_user, file_mode=params.tarballs_mode, host_sys_prepped=params.host_sys_prepped) if resource_created: params.HdfsResource(None, action="execute") Directory(params.hive_interactive_etc_dir_prefix, mode=0755) Logger.info("Directories to fill with configs: %s" % str(params.hive_conf_dirs_list)) for conf_dir in params.hive_conf_dirs_list: fill_conf_dir(conf_dir) ''' As hive2/hive-site.xml only contains the new + the changed props compared to hive/hive-site.xml, we need to merge hive/hive-site.xml and hive2/hive-site.xml and store it in hive2/hive-site.xml. ''' merged_hive_interactive_site = {} merged_hive_interactive_site.update( params.config['configurations']['hive-site']) merged_hive_interactive_site.update( params.config['configurations']['hive-interactive-site']) for item in exclude_list: if item in merged_hive_interactive_site.keys(): del merged_hive_interactive_site[item] ''' Hive2 doesn't have support for Atlas, we need to remove the Hook 'org.apache.atlas.hive.hook.HiveHook', which would have come in config 'hive.exec.post.hooks' during the site merge logic, if Atlas is installed. ''' remove_atlas_hook_if_exists(merged_hive_interactive_site) ''' As tez_hive2/tez-site.xml only contains the new + the changed props compared to tez/tez-site.xml, we need to merge tez/tez-site.xml and tez_hive2/tez-site.xml and store it in tez_hive2/tez-site.xml. ''' merged_tez_interactive_site = {} if 'tez-site' in params.config['configurations']: merged_tez_interactive_site.update( params.config['configurations']['tez-site']) Logger.info( "Retrieved 'tez/tez-site' for merging with 'tez_hive2/tez-interactive-site'." ) else: Logger.error( "Tez's 'tez-site' couldn't be retrieved from passed-in configurations." ) merged_tez_interactive_site.update( params.config['configurations']['tez-interactive-site']) XmlConfig( "tez-site.xml", conf_dir=params.tez_interactive_config_dir, configurations=merged_tez_interactive_site, configuration_attributes=params.config['configuration_attributes'] ['tez-interactive-site'], owner=params.tez_interactive_user, group=params.user_group, mode=0664) # Create config files under /etc/hive2/conf and /etc/hive2/conf/conf.server: # hive-site.xml # hive-env.sh # llap-daemon-log4j2.properties # llap-cli-log4j2.properties # hive-log4j2.properties # hive-exec-log4j2.properties # beeline-log4j2.properties for conf_dir in params.hive_conf_dirs_list: XmlConfig( "hive-site.xml", conf_dir=conf_dir, configurations=merged_hive_interactive_site, configuration_attributes=params.config['configuration_attributes'] ['hive-interactive-site'], owner=params.hive_user, group=params.user_group, mode=0644) hive_server_interactive_conf_dir = conf_dir File(format("{hive_server_interactive_conf_dir}/hive-env.sh"), owner=params.hive_user, group=params.user_group, content=InlineTemplate(params.hive_interactive_env_sh_template)) llap_daemon_log4j_filename = 'llap-daemon-log4j2.properties' File(format( "{hive_server_interactive_conf_dir}/{llap_daemon_log4j_filename}"), mode=0644, group=params.user_group, owner=params.hive_user, content=params.llap_daemon_log4j) llap_cli_log4j2_filename = 'llap-cli-log4j2.properties' File(format( "{hive_server_interactive_conf_dir}/{llap_cli_log4j2_filename}"), mode=0644, group=params.user_group, owner=params.hive_user, content=params.llap_cli_log4j2) hive_log4j2_filename = 'hive-log4j2.properties' File(format( "{hive_server_interactive_conf_dir}/{hive_log4j2_filename}"), mode=0644, group=params.user_group, owner=params.hive_user, content=params.hive_log4j2) hive_exec_log4j2_filename = 'hive-exec-log4j2.properties' File(format( "{hive_server_interactive_conf_dir}/{hive_exec_log4j2_filename}"), mode=0644, group=params.user_group, owner=params.hive_user, content=params.hive_exec_log4j2) beeline_log4j2_filename = 'beeline-log4j2.properties' File(format( "{hive_server_interactive_conf_dir}/{beeline_log4j2_filename}"), mode=0644, group=params.user_group, owner=params.hive_user, content=params.beeline_log4j2) File(format( "{hive_server_interactive_conf_dir}/hadoop-metrics2-llapdaemon.properties" ), owner=params.hive_user, group=params.user_group, content=Template("hadoop-metrics2-llapdaemon.j2")) File(format( "{hive_server_interactive_conf_dir}/hadoop-metrics2-llaptaskscheduler.properties" ), owner=params.hive_user, group=params.user_group, content=Template("hadoop-metrics2-llaptaskscheduler.j2")) # On some OS this folder could be not exists, so we will create it before pushing there files Directory(params.limits_conf_dir, create_parents=True, owner='root', group='root') File(os.path.join(params.limits_conf_dir, 'hive.conf'), owner='root', group='root', mode=0644, content=Template("hive.conf.j2")) if not os.path.exists(params.target_hive_interactive): jdbc_connector(params.target_hive_interactive, params.hive_intaractive_previous_jdbc_jar) File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"), content=DownloadSource( format("{jdk_location}{check_db_connection_jar_name}")), mode=0644) File(params.start_hiveserver2_interactive_path, mode=0755, content=Template(format('{start_hiveserver2_interactive_script}'))) Directory(params.hive_pid_dir, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755) Directory(params.hive_log_dir, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755) Directory(params.hive_interactive_var_lib, create_parents=True, cd_access='a', owner=params.hive_user, group=params.user_group, mode=0755)
def setup_conf_dir( name=None ): # 'master' or 'tserver' or 'monitor' or 'gc' or 'tracer' or 'client' import params # create the conf directory Directory(params.conf_dir, mode=0755, owner=params.accumulo_user, group=params.user_group, create_parents=True) if name == 'client': dest_conf_dir = params.conf_dir # create a site file for client processes configs = {} configs.update(params.config['configurations']['accumulo-site']) if "instance.secret" in configs: configs.pop("instance.secret") if "trace.token.property.password" in configs: configs.pop("trace.token.property.password") XmlConfig( "accumulo-site.xml", conf_dir=dest_conf_dir, configurations=configs, configuration_attributes=params.config['configuration_attributes'] ['accumulo-site'], owner=params.accumulo_user, group=params.user_group, mode=0644) # create env file File(format("{dest_conf_dir}/accumulo-env.sh"), mode=0644, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate(params.env_sh_template)) else: dest_conf_dir = params.server_conf_dir # create server conf directory Directory(params.server_conf_dir, mode=0700, owner=params.accumulo_user, group=params.user_group, create_parents=True) # create a site file for server processes configs = {} configs.update(params.config['configurations']['accumulo-site']) configs["instance.secret"] = str( params.config['configurations']['accumulo-env']['instance_secret']) configs["trace.token.property.password"] = str(params.trace_password) XmlConfig( "accumulo-site.xml", conf_dir=dest_conf_dir, configurations=configs, configuration_attributes=params.config['configuration_attributes'] ['accumulo-site'], owner=params.accumulo_user, group=params.user_group, mode=0600) # create pid dir Directory( params.pid_dir, owner=params.accumulo_user, group=params.user_group, create_parents=True, cd_access="a", mode=0755, ) # create log dir Directory( params.log_dir, owner=params.accumulo_user, group=params.user_group, create_parents=True, cd_access="a", mode=0755, ) # create env file File(format("{dest_conf_dir}/accumulo-env.sh"), mode=0644, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate(params.server_env_sh_template)) if params.security_enabled: accumulo_TemplateConfig("accumulo_jaas.conf", dest_conf_dir) # create client.conf file configs = {} if 'client' in params.config['configurations']: configs.update(params.config['configurations']['client']) configs["instance.name"] = params.instance_name configs["instance.zookeeper.host"] = params.config['configurations'][ 'accumulo-site']['instance.zookeeper.host'] copy_site_property(configs, 'instance.rpc.sasl.enabled') copy_site_property(configs, 'rpc.sasl.qop') copy_site_property(configs, 'rpc.useJsse') copy_site_property(configs, 'instance.rpc.ssl.clientAuth') copy_site_property(configs, 'instance.rpc.ssl.enabled') copy_site_property(configs, 'instance.zookeeper.timeout') copy_site_property(configs, 'trace.span.receivers') copy_site_property(configs, 'trace.zookeeper.path') for key, value in params.config['configurations'][ 'accumulo-site'].iteritems(): if key.startswith("trace.span.receiver."): configs[key] = value PropertiesFile(format("{dest_conf_dir}/client.conf"), properties=configs, owner=params.accumulo_user, group=params.user_group) # create log4j.properties files if (params.log4j_props != None): File(format("{dest_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.accumulo_user, content=params.log4j_props) else: File(format("{dest_conf_dir}/log4j.properties"), mode=0644, group=params.user_group, owner=params.hbase_user) # create logging configuration files accumulo_TemplateConfig("auditLog.xml", dest_conf_dir) accumulo_TemplateConfig("generic_logger.xml", dest_conf_dir) accumulo_TemplateConfig("monitor_logger.xml", dest_conf_dir) accumulo_StaticFile("accumulo-metrics.xml", dest_conf_dir) # create host files accumulo_TemplateConfig("tracers", dest_conf_dir) accumulo_TemplateConfig("gc", dest_conf_dir) accumulo_TemplateConfig("monitor", dest_conf_dir) accumulo_TemplateConfig("slaves", dest_conf_dir) accumulo_TemplateConfig("masters", dest_conf_dir) # metrics configuration if params.has_metric_collector: accumulo_TemplateConfig("hadoop-metrics2-accumulo.properties", dest_conf_dir) # other server setup if name == 'master': params.HdfsResource(format("/user/{params.accumulo_user}"), type="directory", action="create_on_execute", owner=params.accumulo_user, mode=0700) params.HdfsResource(format("{params.parent_dir}"), type="directory", action="create_on_execute", owner=params.accumulo_user, mode=0700) params.HdfsResource(None, action="execute") if params.security_enabled and params.has_secure_user_auth: Execute(format("{params.kinit_cmd} " "{params.daemon_script} init " "--user {params.accumulo_principal_name} " "--instance-name {params.instance_name} " "--clear-instance-name " ">{params.log_dir}/accumulo-init.out " "2>{params.log_dir}/accumulo-init.err"), not_if=as_user( format("{params.kinit_cmd} " "{params.hadoop_bin_dir}/hadoop --config " "{params.hadoop_conf_dir} fs -stat " "{params.instance_volumes}"), params.accumulo_user), logoutput=True, user=params.accumulo_user) else: passfile = format("{params.exec_tmp_dir}/pass") try: File(passfile, mode=0600, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate('{{root_password}}\n' '{{root_password}}\n\n')) Execute(format("cat {passfile} | {params.daemon_script} init " "--instance-name {params.instance_name} " "--clear-instance-name " ">{params.log_dir}/accumulo-init.out " "2>{params.log_dir}/accumulo-init.err"), not_if=as_user( format("{params.kinit_cmd} " "{params.hadoop_bin_dir}/hadoop --config " "{params.hadoop_conf_dir} fs -stat " "{params.instance_volumes}"), params.accumulo_user), logoutput=True, user=params.accumulo_user) finally: File(passfile, action="delete") if name == 'tracer': if params.security_enabled and params.has_secure_user_auth: Execute(format("{params.kinit_cmd} " "{params.daemon_script} init --reset-security " "--user {params.accumulo_principal_name} " "--password NA " ">{params.log_dir}/accumulo-reset.out " "2>{params.log_dir}/accumulo-reset.err"), not_if=as_user( format("{params.kinit_cmd} " "{params.daemon_script} shell -e " "\"userpermissions -u " "{params.accumulo_principal_name}\" | " "grep System.CREATE_TABLE"), params.accumulo_user), user=params.accumulo_user) create_user(params.smokeuser_principal, params.smoke_test_password) else: # do not try to reset security in nonsecure mode, for now # Execute( format("{params.daemon_script} init --reset-security " # "--user root " # ">{params.log_dir}/accumulo-reset.out " # "2>{params.log_dir}/accumulo-reset.err"), # not_if=as_user(format("cat {rpassfile} | " # "{params.daemon_script} shell -e " # "\"userpermissions -u root\" | " # "grep System.CREATE_TABLE"), # params.accumulo_user), # user=params.accumulo_user) create_user(params.smoke_test_user, params.smoke_test_password) create_user(params.trace_user, params.trace_password) rpassfile = format("{params.exec_tmp_dir}/pass0") cmdfile = format("{params.exec_tmp_dir}/resetcmds") try: File(cmdfile, mode=0600, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate( 'grant -t trace -u {{trace_user}} Table.ALTER_TABLE\n' 'grant -t trace -u {{trace_user}} Table.READ\n' 'grant -t trace -u {{trace_user}} Table.WRITE\n\n')) if params.security_enabled and params.has_secure_user_auth: Execute(format( "{params.kinit_cmd} {params.daemon_script} shell -f " "{cmdfile}"), only_if=as_user( format("{params.kinit_cmd} " "{params.daemon_script} shell " "-e \"table trace\""), params.accumulo_user), not_if=as_user( format("{params.kinit_cmd} " "{params.daemon_script} shell " "-e \"userpermissions -u " "{params.trace_user} | " "grep Table.READ | grep trace"), params.accumulo_user), user=params.accumulo_user) else: File(rpassfile, mode=0600, group=params.user_group, owner=params.accumulo_user, content=InlineTemplate('{{root_password}}\n\n')) Execute( format("cat {rpassfile} | {params.daemon_script} shell -f " "{cmdfile} -u root"), only_if=as_user( format("cat {rpassfile} | " "{params.daemon_script} shell -u root " "-e \"table trace\""), params.accumulo_user), not_if=as_user( format("cat {rpassfile} | " "{params.daemon_script} shell -u root " "-e \"userpermissions -u " "{params.trace_user} | " "grep Table.READ | grep trace"), params.accumulo_user), user=params.accumulo_user) finally: try_remove(rpassfile) try_remove(cmdfile)