def get_check_command(oozie_url, host_name, configurations): if OOZIE_USER in configurations: oozie_user = configurations[OOZIE_USER] else: raise Exception("Oozie user is required") security_enabled = False if SECURITY_ENABLED in configurations: security_enabled = str(configurations[SECURITY_ENABLED]).upper() == 'TRUE' kerberos_env = None if security_enabled: if OOZIE_KEYTAB in configurations and OOZIE_PRINCIPAL in configurations: oozie_keytab = configurations[OOZIE_KEYTAB] oozie_principal = configurations[OOZIE_PRINCIPAL] # substitute _HOST in kerberos principal with actual fqdn oozie_principal = oozie_principal.replace('_HOST', host_name) else: raise KerberosPropertiesNotFound('The Oozie keytab and principal are required configurations when security is enabled.') # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl env = Environment.get_instance() ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, os.sep, os.getpid()) kerberos_env = {'KRB5CCNAME': ccache_file} # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None klist_path_local = get_klist_path(kerberos_executable_search_paths) klist_command = format("{klist_path_local} -s {ccache_file}") # Determine if we need to kinit by testing to see if the relevant cache exists and has # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number # it kinits we do but recover quickly when keytabs are regenerated return_code, _ = call(klist_command, user=oozie_user) if return_code != 0: kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinit_command = format("{kinit_path_local} -l 5m -kt {oozie_keytab} {oozie_principal}; ") # kinit Execute(kinit_command, environment=kerberos_env, user=oozie_user, ) # oozie configuration directory uses a symlink when > HDP 2.2 oozie_config_directory = OOZIE_CONF_DIR_LEGACY if os.path.exists(OOZIE_CONF_DIR): oozie_config_directory = OOZIE_CONF_DIR command = "source {0}/oozie-env.sh ; oozie admin -oozie {1} -status".format( oozie_config_directory, oozie_url) return (command, kerberos_env, oozie_user)
def test_service_check_secured(self): self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/service_check.py", classname = "SqoopServiceCheck", command = "service_check", config_file="secured.json", hdp_stack_version = self.STACK_VERSION, target = RMFTestCase.TARGET_COMMON_SERVICES ) kinit_path_local = get_kinit_path() self.assertResourceCalled('Execute', kinit_path_local + ' -kt /etc/security/keytabs/smokeuser.headless.keytab [email protected]', user = '******' ) self.assertResourceCalled('Execute', 'sqoop version', logoutput = True, path = ['/usr/bin'], user = '******',) self.assertNoMoreResources()
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return (('UNKNOWN', ['There were no configurations supplied to the script.'])) if not HIVE_METASTORE_URIS_KEY in configurations: return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.'])) metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',') security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT if CHECK_COMMAND_TIMEOUT_KEY in parameters: check_command_timeout = float(parameters[CHECK_COMMAND_TIMEOUT_KEY]) # defaults smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT smokeuser = SMOKEUSER_DEFAULT # check script params if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] # check configurations last as they should always take precedence if SMOKEUSER_PRINCIPAL_KEY in configurations: smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] if SMOKEUSER_KEY in configurations: smokeuser = configurations[SMOKEUSER_KEY] result_code = None try: if security_enabled: if SMOKEUSER_KEYTAB_KEY in configurations: smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinitcmd = format( "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; " ) # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=smokeuser, path=[ "/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/" ], timeout=10) finally: kinit_lock.release() if host_name is None: host_name = socket.getfqdn() port = None for uri in metastore_uris: if host_name in uri: parts = urlparse(uri) port = parts.port start_time = time.time() try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) result = sock.connect_ex((host_name, port)) total_time = time.time() - start_time if result == 0: result_code = 'OK' label = OK_MESSAGE.format(total_time) else: result_code = 'CRITICAL' label = NOT_LISTENING_MESSAGE.format(host_name, port) except: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, traceback.format_exc()) except: label = traceback.format_exc() result_code = 'UNKNOWN' return ((result_code, [label]))
def execute(parameters=None, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: parameters (dictionary): a mapping of parameter key to value host_name (string): the name of this host where the alert is running """ if parameters is None: return (('UNKNOWN', ['There were no parameters supplied to the script.'])) if not HIVE_METASTORE_URIS_KEY in parameters: return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.'])) metastore_uris = parameters[HIVE_METASTORE_URIS_KEY].split(',') security_enabled = False if SECURITY_ENABLED_KEY in parameters: security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE' smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT if SMOKEUSER_PRINCIPAL_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_KEY] smokeuser = SMOKEUSER_DEFAULT if SMOKEUSER_KEY in parameters: smokeuser = parameters[SMOKEUSER_KEY] result_code = None try: if security_enabled: smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT if SMOKEUSER_KEYTAB_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY] kinit_path_local = get_kinit_path() kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; ") Execute(kinitcmd, user=smokeuser, path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"], timeout=10) if host_name is None: host_name = socket.getfqdn() for uri in metastore_uris: if host_name in uri: metastore_uri = uri cmd = format("export HIVE_CONF_DIR='/etc/hive/conf.server/' ; " "hive --hiveconf hive.metastore.uris={metastore_uri} --hiveconf hive.execution.engine=mr -e 'show databases;'") start_time = time.time() try: Execute(cmd, user=smokeuser, path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"], timeout=30 ) total_time = time.time() - start_time result_code = 'OK' label = OK_MESSAGE.format(total_time) except Exception, exception: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, exception.message) except Exception, e: label = str(e) result_code = 'UNKNOWN'
def get_check_command(oozie_url, host_name, configurations, parameters, only_kinit): kerberos_env = None user = USER_DEFAULT if USER_KEY in configurations: user = configurations[USER_KEY] if is_security_enabled(configurations): # defaults user_keytab = USER_KEYTAB_DEFAULT user_principal = USER_PRINCIPAL_DEFAULT # check script params if USER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: user_principal = parameters[USER_PRINCIPAL_SCRIPT_PARAM_KEY] user_principal = user_principal.replace('_HOST', host_name.lower()) if USER_KEYTAB_SCRIPT_PARAM_KEY in parameters: user_keytab = parameters[USER_KEYTAB_SCRIPT_PARAM_KEY] # check configurations last as they should always take precedence if USER_PRINCIPAL_KEY in configurations: user_principal = configurations[USER_PRINCIPAL_KEY] user_principal = user_principal.replace('_HOST', host_name.lower()) if USER_KEYTAB_KEY in configurations: user_keytab = configurations[USER_KEYTAB_KEY] # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl env = Environment.get_instance() ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, os.sep, os.getpid()) kerberos_env = {'KRB5CCNAME': ccache_file} # Get the configured Kerberos executable search paths, if any kerberos_executable_search_paths = None if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] klist_path_local = get_klist_path(kerberos_executable_search_paths) kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinit_part_command = format("{kinit_path_local} -l 5m20s -c {ccache_file} -kt {user_keytab} {user_principal}; ") # Determine if we need to kinit by testing to see if the relevant cache exists and has # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number # it kinits we do but recover quickly when keytabs are regenerated if only_kinit: kinit_command = kinit_part_command else: kinit_command = "{0} -s {1} || ".format(klist_path_local, ccache_file) + kinit_part_command Execute(kinit_command, environment=kerberos_env, user=user) # oozie configuration directory uses a symlink when > HDP 2.2 oozie_config_directory = OOZIE_CONF_DIR_LEGACY if os.path.exists(OOZIE_CONF_DIR): oozie_config_directory = OOZIE_CONF_DIR command = "source {0}/oozie-env.sh ; oozie admin -oozie {1} -status".format( oozie_config_directory, oozie_url) return (command, kerberos_env, user)
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ result_code = RESULT_CODE_UNKNOWN if configurations is None: return (result_code, ['There were no configurations supplied to the script.']) webhcat_port = WEBHCAT_PORT_DEFAULT if TEMPLETON_PORT_KEY in configurations: webhcat_port = int(configurations[TEMPLETON_PORT_KEY]) security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = configurations[SECURITY_ENABLED_KEY].lower() == 'true' # parse script arguments connection_timeout = CONNECTION_TIMEOUT_DEFAULT curl_connection_timeout = CURL_CONNECTION_TIMEOUT_DEFAULT if CONNECTION_TIMEOUT_KEY in parameters: connection_timeout = float(parameters[CONNECTION_TIMEOUT_KEY]) curl_connection_timeout = str(int(connection_timeout)) # the alert will always run on the webhcat host if host_name is None: host_name = socket.getfqdn() # webhcat always uses http, never SSL query_url = "http://{0}:{1}/templeton/v1/status".format(host_name, webhcat_port) # initialize total_time = 0 json_response = {} if security_enabled: if WEBHCAT_KEYTAB_KEY not in configurations or WEBHCAT_PRINCIPAL_KEY not in configurations: return (RESULT_CODE_UNKNOWN, [str(configurations)]) try: webhcat_keytab = configurations[WEBHCAT_KEYTAB_KEY] webhcat_principal = configurations[WEBHCAT_PRINCIPAL_KEY] # substitute _HOST in kerberos principal with actual fqdn webhcat_principal = webhcat_principal.replace('_HOST', host_name) # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl env = Environment.get_instance() ccache_file = "{0}{1}webhcat_alert_cc_{2}".format(env.tmp_dir, sep, getpid()) kerberos_env = {'KRB5CCNAME': ccache_file} # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None klist_path_local = get_klist_path(kerberos_executable_search_paths) klist_command = format("{klist_path_local} -s {ccache_file}") # Determine if we need to kinit by testing to see if the relevant cache exists and has # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number # it kinits we do but recover quickly when keytabs are regenerated return_code, _ = call(klist_command) if return_code != 0: kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinit_command = format("{kinit_path_local} -l 5m -c {ccache_file} -kt {webhcat_keytab} {webhcat_principal}; ") # kinit so that curl will work with --negotiate Execute(kinit_command) # make a single curl call to get just the http code curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL', '-w', '%{http_code}', '--connect-timeout', curl_connection_timeout, '-o', '/dev/null', query_url], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env) stdout, stderr = curl.communicate() if stderr != '': raise Exception(stderr) # check the response code response_code = int(stdout) # 0 indicates no connection if response_code == 0: label = CRITICAL_CONNECTION_MESSAGE.format(query_url) return (RESULT_CODE_CRITICAL, [label]) # any other response aside from 200 is a problem if response_code != 200: label = CRITICAL_HTTP_MESSAGE.format(response_code, query_url) return (RESULT_CODE_CRITICAL, [label]) # now that we have the http status and it was 200, get the content start_time = time.time() curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-sL', '--connect-timeout', curl_connection_timeout, query_url, ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env) stdout, stderr = curl.communicate() total_time = time.time() - start_time if stderr != '': raise Exception(stderr) json_response = json.loads(stdout) except Exception, exception: return (RESULT_CODE_CRITICAL, [str(exception)])
""" from resource_management.libraries.script import Script from resource_management.libraries.functions import get_kinit_path from resource_management.libraries.functions import default, format config = Script.get_config() pid_dir = config['configurations']['storm-env']['storm_pid_dir'] pid_nimbus = format("{pid_dir}/nimbus.pid") pid_supervisor = format("{pid_dir}/supervisor.pid") pid_drpc = format("{pid_dir}/drpc.pid") pid_ui = format("{pid_dir}/ui.pid") pid_logviewer = format("{pid_dir}/logviewer.pid") pid_rest_api = format("{pid_dir}/restapi.pid") pid_files = {"logviewer":pid_logviewer, "ui": pid_ui, "nimbus": pid_nimbus, "supervisor": pid_supervisor, "drpc": pid_drpc, "rest_api": pid_rest_api} # Security related/required params hostname = config['hostname'] security_enabled = config['configurations']['cluster-env']['security_enabled'] kinit_path_local = get_kinit_path() tmp_dir = Script.get_tmp_dir() conf_dir = "/etc/storm/conf" storm_user = config['configurations']['storm-env']['storm_user'] storm_ui_principal = default('/configurations/storm-env/storm_ui_principal_name', None) storm_ui_keytab = default('/configurations/storm-env/storm_ui_keytab', None)
def execute(parameters=None, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: parameters (dictionary): a mapping of parameter key to value host_name (string): the name of this host where the alert is running """ if parameters is None: return (('UNKNOWN', ['There were no parameters supplied to the script.'])) transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT if HIVE_SERVER_TRANSPORT_MODE_KEY in parameters: transport_mode = parameters[HIVE_SERVER_TRANSPORT_MODE_KEY] port = THRIFT_PORT_DEFAULT if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in parameters: port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY]) elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in parameters: port = int(parameters[HIVE_SERVER_THRIFT_HTTP_PORT_KEY]) security_enabled = False if SECURITY_ENABLED_KEY in parameters: security_enabled = str(parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE' hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT if HIVE_SERVER2_AUTHENTICATION_KEY in parameters: hive_server2_authentication = parameters[HIVE_SERVER2_AUTHENTICATION_KEY] smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT if SMOKEUSER_PRINCIPAL_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_KEY] smokeuser = SMOKEUSER_DEFAULT if SMOKEUSER_KEY in parameters: smokeuser = parameters[SMOKEUSER_KEY] result_code = None if security_enabled: hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT if HIVE_SERVER_PRINCIPAL_KEY in parameters: hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY] smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT if SMOKEUSER_KEYTAB_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY] kinit_path_local = get_kinit_path() kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; ") else: hive_server_principal = None kinitcmd=None try: if host_name is None: host_name = socket.getfqdn() start_time = time.time() try: hive_check.check_thrift_port_sasl(host_name, port, hive_server2_authentication, hive_server_principal, kinitcmd, smokeuser, transport_mode = transport_mode) is_thrift_port_ok = True except: is_thrift_port_ok = False if is_thrift_port_ok == True: result_code = 'OK' total_time = time.time() - start_time label = OK_MESSAGE % (total_time, port) else: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name,port) except Exception, e: label = str(e) result_code = 'UNKNOWN'
def execute(parameters=None, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: parameters (dictionary): a mapping of parameter key to value host_name (string): the name of this host where the alert is running """ result_code = RESULT_CODE_UNKNOWN if parameters is None: return (result_code, ['There were no parameters supplied to the script.']) webhcat_port = WEBHCAT_PORT_DEFAULT if TEMPLETON_PORT_KEY in parameters: webhcat_port = int(parameters[TEMPLETON_PORT_KEY]) security_enabled = False if SECURITY_ENABLED_KEY in parameters: security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true' # the alert will always run on the webhcat host if host_name is None: host_name = socket.getfqdn() # webhcat always uses http, never SSL query_url = "http://{0}:{1}/templeton/v1/status".format( host_name, webhcat_port) # initialize total_time = 0 json_response = {} if security_enabled: if WEBHCAT_KEYTAB_KEY not in parameters or WEBHCAT_PRINCIPAL_KEY not in parameters: return (RESULT_CODE_UNKNOWN, [str(parameters)]) try: webhcat_keytab = parameters[WEBHCAT_KEYTAB_KEY] webhcat_principal = parameters[WEBHCAT_PRINCIPAL_KEY] # substitute _HOST in kerberos principal with actual fqdn webhcat_principal = webhcat_principal.replace('_HOST', host_name) # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl env = Environment.get_instance() ccache_file = "{0}{1}webhcat_alert_cc_{2}".format( env.tmp_dir, sep, getpid()) kerberos_env = {'KRB5CCNAME': ccache_file} # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in parameters: kerberos_executable_search_paths = parameters[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None klist_path_local = get_klist_path(kerberos_executable_search_paths) klist_command = format("{klist_path_local} -s {ccache_file}") # Determine if we need to kinit by testing to see if the relevant cache exists and has # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number # it kinits we do but recover quickly when keytabs are regenerated return_code, _ = call(klist_command) if return_code != 0: kinit_path_local = get_kinit_path( kerberos_executable_search_paths) kinit_command = format( "{kinit_path_local} -l 5m -c {ccache_file} -kt {webhcat_keytab} {webhcat_principal}; " ) # kinit so that curl will work with --negotiate Execute(kinit_command) # make a single curl call to get just the http code curl = subprocess.Popen([ 'curl', '--negotiate', '-u', ':', '-sL', '-w', '%{http_code}', '--connect-timeout', CURL_CONNECTION_TIMEOUT, '-o', '/dev/null', query_url ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env) stdout, stderr = curl.communicate() if stderr != '': raise Exception(stderr) # check the response code response_code = int(stdout) # 0 indicates no connection if response_code == 0: label = CRITICAL_CONNECTION_MESSAGE.format(query_url) return (RESULT_CODE_CRITICAL, [label]) # any other response aside from 200 is a problem if response_code != 200: label = CRITICAL_HTTP_MESSAGE.format(response_code, query_url) return (RESULT_CODE_CRITICAL, [label]) # now that we have the http status and it was 200, get the content start_time = time.time() curl = subprocess.Popen([ 'curl', '--negotiate', '-u', ':', '-sL', '--connect-timeout', CURL_CONNECTION_TIMEOUT, query_url, ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env) stdout, stderr = curl.communicate() total_time = time.time() - start_time if stderr != '': raise Exception(stderr) json_response = json.loads(stdout) except Exception, exception: return (RESULT_CODE_CRITICAL, [str(exception)])
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return ('UNKNOWN', ['There were no configurations supplied to the script.']) LIVY_PORT_DEFAULT = 8999 port = LIVY_PORT_DEFAULT if LIVY_SERVER_PORT_KEY in configurations: port = int(configurations[LIVY_SERVER_PORT_KEY]) if LIVY_SERVER_HOST_KEY in configurations: host_name = str(configurations[LIVY_SERVER_HOST_KEY]) if host_name is None: host_name = socket.getfqdn() livyuser = configurations[SMOKEUSER_KEY] security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' smokeuser_kerberos_keytab = None if SMOKEUSER_KEYTAB_KEY in configurations: smokeuser_kerberos_keytab = configurations[SMOKEUSER_KEYTAB_KEY] if host_name is None: host_name = socket.getfqdn() smokeuser_principal = None if SMOKEUSER_PRINCIPAL_KEY in configurations: smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] smokeuser_principal = smokeuser_principal.replace( '_HOST', host_name.lower()) # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) if security_enabled: kinitcmd = format( "{kinit_path_local} -kt {smokeuser_kerberos_keytab} {smokeuser_principal}; " ) # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=livyuser) finally: kinit_lock.release() http_scheme = 'https' if LIVY_SSL_ENABLED_KEY in configurations else 'http' result_code = None try: start_time = time.time() try: livy2_livyserver_host = str(host_name) livy_cmd = format( "curl -s -o /dev/null -w'%{{http_code}}' --negotiate -u: -k {http_scheme}://{livy2_livyserver_host}:{port}/sessions | grep 200 " ) Execute(livy_cmd, tries=3, try_sleep=1, logoutput=True, user=livyuser) total_time = time.time() - start_time result_code = 'OK' label = OK_MESSAGE.format(total_time, port) except: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc()) except: label = traceback.format_exc() result_code = 'UNKNOWN' return (result_code, [label])
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return ('UNKNOWN', ['There were no configurations supplied to the script.']) transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations: transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY] port = THRIFT_PORT_DEFAULT if transport_mode.lower( ) == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations: port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY]) elif transport_mode.lower( ) == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations: port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY]) security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT if CHECK_COMMAND_TIMEOUT_KEY in parameters: check_command_timeout = float(parameters[CHECK_COMMAND_TIMEOUT_KEY]) hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT if HIVE_SERVER2_AUTHENTICATION_KEY in configurations: hive_server2_authentication = configurations[ HIVE_SERVER2_AUTHENTICATION_KEY] hive_ssl = False if HIVE_SSL in configurations: hive_ssl = configurations[HIVE_SSL] hive_ssl_keystore_path = None if HIVE_SSL_KEYSTORE_PATH in configurations: hive_ssl_keystore_path = configurations[HIVE_SSL_KEYSTORE_PATH] hive_ssl_keystore_password = None if HIVE_SSL_KEYSTORE_PASSWORD in configurations: hive_ssl_keystore_password = configurations[HIVE_SSL_KEYSTORE_PASSWORD] # defaults smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT smokeuser = SMOKEUSER_DEFAULT # check script params if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] # check configurations last as they should always take precedence if SMOKEUSER_PRINCIPAL_KEY in configurations: smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] if SMOKEUSER_KEY in configurations: smokeuser = configurations[SMOKEUSER_KEY] hive_user = HIVE_USER_DEFAULT if HIVE_USER_KEY in configurations: hive_user = configurations[HIVE_USER_KEY] ldap_username = "" ldap_password = "" if HIVE_LDAP_USERNAME in configurations: ldap_username = configurations[HIVE_LDAP_USERNAME] if HIVE_LDAP_PASSWORD in configurations: ldap_password = configurations[HIVE_LDAP_PASSWORD] result_code = None if security_enabled: hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT if HIVE_SERVER_PRINCIPAL_KEY in configurations: hive_server_principal = configurations[HIVE_SERVER_PRINCIPAL_KEY] if SMOKEUSER_KEYTAB_KEY in configurations: smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinitcmd = format( "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; " ) else: hive_server_principal = None kinitcmd = None try: if host_name is None: host_name = socket.getfqdn() start_time = time.time() try: hive_check.check_thrift_port_sasl( host_name, port, hive_server2_authentication, hive_server_principal, kinitcmd, smokeuser, hive_user=hive_user, transport_mode=transport_mode, ssl=hive_ssl, ssl_keystore=hive_ssl_keystore_path, ssl_password=hive_ssl_keystore_password, check_command_timeout=int(check_command_timeout), ldap_username=ldap_username, ldap_password=ldap_password) result_code = 'OK' total_time = time.time() - start_time label = OK_MESSAGE.format(total_time, port) except: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc()) except: label = traceback.format_exc() result_code = 'UNKNOWN' return (result_code, [label])
def get_check_command(oozie_url, host_name, configurations): if OOZIE_USER in configurations: oozie_user = configurations[OOZIE_USER] else: raise Exception("Oozie user is required") security_enabled = False if SECURITY_ENABLED in configurations: security_enabled = str( configurations[SECURITY_ENABLED]).upper() == 'TRUE' kerberos_env = None if security_enabled: if OOZIE_KEYTAB in configurations and OOZIE_PRINCIPAL in configurations: oozie_keytab = configurations[OOZIE_KEYTAB] oozie_principal = configurations[OOZIE_PRINCIPAL] # substitute _HOST in kerberos principal with actual fqdn oozie_principal = oozie_principal.replace('_HOST', host_name) else: raise KerberosPropertiesNotFound( 'The Oozie keytab and principal are required configurations when security is enabled.' ) # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl env = Environment.get_instance() ccache_file = "{0}{1}oozie_alert_cc_{2}".format( env.tmp_dir, os.sep, os.getpid()) kerberos_env = {'KRB5CCNAME': ccache_file} # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None klist_path_local = get_klist_path(kerberos_executable_search_paths) klist_command = format("{klist_path_local} -s {ccache_file}") # Determine if we need to kinit by testing to see if the relevant cache exists and has # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number # it kinits we do but recover quickly when keytabs are regenerated return_code, _ = call(klist_command, user=oozie_user) if return_code != 0: kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinit_command = format( "{kinit_path_local} -l 5m -kt {oozie_keytab} {oozie_principal}; " ) # kinit Execute( kinit_command, environment=kerberos_env, user=oozie_user, ) # oozie configuration directory uses a symlink when > HDP 2.2 oozie_config_directory = OOZIE_CONF_DIR_LEGACY if os.path.exists(OOZIE_CONF_DIR): oozie_config_directory = OOZIE_CONF_DIR command = "source {0}/oozie-env.sh ; oozie admin -oozie {1} -status".format( oozie_config_directory, oozie_url) return (command, kerberos_env, oozie_user)
def configureResourcesCalledSecure(self): # Both server and client self.assertResourceCalled('Directory', self.conf_dir, owner='atlas', group='hadoop', create_parents = True, cd_access='a', mode=0755 ) # Pid dir self.assertResourceCalled('Directory', '/var/run/atlas', owner='atlas', group='hadoop', create_parents = True, cd_access='a', mode=0755 ) self.assertResourceCalled('Directory', self.conf_dir + "/solr", owner='atlas', group='hadoop', create_parents = True, cd_access='a', mode=0755, recursive_ownership = True ) # Log dir self.assertResourceCalled('Directory', '/var/log/atlas', owner='atlas', group='hadoop', create_parents = True, cd_access='a', mode=0755 ) # Data dir self.assertResourceCalled('Directory', self.stack_root+'/current/atlas-server/data', owner='atlas', group='hadoop', create_parents = True, cd_access='a', mode=0644 ) # Expanded war dir self.assertResourceCalled('Directory', self.stack_root+'/current/atlas-server/server/webapp', owner='atlas', group='hadoop', create_parents = True, cd_access='a', mode=0644 ) self.assertResourceCalled('Execute', ('cp', self.stack_root+'/current/atlas-server/server/webapp/atlas.war', self.stack_root+'/current/atlas-server/server/webapp/atlas.war'), sudo = True, not_if = True, ) host_name = u"c6401.ambari.apache.org" app_props = dict(self.getConfig()['configurations']['application-properties']) app_props['atlas.server.bind.address'] = host_name metadata_protocol = "https" if app_props["atlas.enableTLS"] is True else "http" metadata_port = app_props["atlas.server.https.port"] if metadata_protocol == "https" else app_props["atlas.server.http.port"] app_props["atlas.rest.address"] = u'%s://%s:%s' % (metadata_protocol, host_name, metadata_port) app_props["atlas.server.ids"] = "id1" app_props["atlas.server.address.id1"] = u"%s:%s" % (host_name, metadata_port) app_props["atlas.server.ha.enabled"] = "false" self.assertResourceCalled('File', self.conf_dir + "/atlas-log4j.xml", content=InlineTemplate( self.getConfig()['configurations'][ 'atlas-log4j']['content']), owner='atlas', group='hadoop', mode=0644, ) self.assertResourceCalled('File', self.conf_dir + "/atlas-env.sh", content=InlineTemplate( self.getConfig()['configurations'][ 'atlas-env']['content']), owner='atlas', group='hadoop', mode=0755, ) self.assertResourceCalled('File', self.conf_dir+"/solr/solrconfig.xml", content=InlineTemplate( self.getConfig()['configurations'][ 'atlas-solrconfig']['content']), owner='atlas', group='hadoop', mode=0644, ) # application.properties file self.assertResourceCalled('PropertiesFile',self.conf_dir + "/application.properties", properties=app_props, owner=u'atlas', group=u'hadoop', mode=0600, ) self.assertResourceCalled('TemplateConfig', self.conf_dir+"/atlas_jaas.conf", owner = 'atlas', ) self.assertResourceCalled('Directory', '/var/log/ambari-infra-solr-client', create_parents = True, cd_access='a', mode=0755 ) self.assertResourceCalled('Directory', '/usr/lib/ambari-infra-solr-client', create_parents = True, recursive_ownership = True, cd_access='a', mode=0755 ) self.assertResourceCalled('File', '/usr/lib/ambari-infra-solr-client/solrCloudCli.sh', content=StaticFile('/usr/lib/ambari-infra-solr-client/solrCloudCli.sh'), mode=0755, ) self.assertResourceCalled('File', '/usr/lib/ambari-infra-solr-client/log4j.properties', content=self.getConfig()['configurations']['infra-solr-client-log4j']['content'], mode=0644, ) self.assertResourceCalled('File', '/var/log/ambari-infra-solr-client/solr-client.log', mode=0664, content='' ) self.assertResourceCalledRegexp('^Execute$', '^ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181 --znode /infra-solr --check-znode --retry 5 --interval 10') self.assertResourceCalledRegexp('^Execute$', '^ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181/infra-solr --jaas-file /usr/hdp/current/atlas-server/conf/atlas_jaas.conf --download-config --config-dir /tmp/solr_config_atlas_configs_0.[0-9]* --config-set atlas_configs --retry 30 --interval 5') self.assertResourceCalledRegexp('^File$', '^/tmp/solr_config_atlas_configs_0.[0-9]*', content=InlineTemplate(self.getConfig()['configurations']['atlas-solrconfig']['content']), only_if='test -d /tmp/solr_config_atlas_configs_0.[0-9]*') self.assertResourceCalledRegexp('^Execute$', '^ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181/infra-solr --jaas-file /usr/hdp/current/atlas-server/conf/atlas_jaas.conf --upload-config --config-dir /tmp/solr_config_atlas_configs_0.[0-9]* --config-set atlas_configs --retry 30 --interval 5', only_if='test -d /tmp/solr_config_atlas_configs_0.[0-9]*') self.assertResourceCalledRegexp('^Execute$', '^ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181/infra-solr --jaas-file /usr/hdp/current/atlas-server/conf/atlas_jaas.conf --upload-config --config-dir {0}/solr --config-set atlas_configs --retry 30 --interval 5'.format(self.conf_dir), not_if='test -d /tmp/solr_config_atlas_configs_0.[0-9]*') self.assertResourceCalledRegexp('^Directory$', '^/tmp/solr_config_atlas_configs_0.[0-9]*', action=['delete'], create_parents=True) kinit_path_local = get_kinit_path() self.assertResourceCalled('Execute', kinit_path_local + " -kt /etc/security/keytabs/ambari-infra-solr.keytab infra-solr/[email protected]; curl -k -s --negotiate -u : http://c6401.ambari.apache.org:8886/solr/admin/authorization | grep authorization.enabled && " + kinit_path_local +" -kt /etc/security/keytabs/ambari-infra-solr.keytab infra-solr/[email protected]; curl -H 'Content-type:application/json' -d '{\"set-user-role\": {\"[email protected]\": [\"atlas_user\", \"ranger_audit_user\", \"dev\"]}}' -s -o /dev/null -w'%{http_code}' --negotiate -u: -k http://c6401.ambari.apache.org:8886/solr/admin/authorization | grep 200", logoutput = True, tries = 30, try_sleep = 10, user='******') self.assertResourceCalledRegexp('^Execute$', '^ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181/infra-solr --jaas-file /usr/hdp/current/atlas-server/conf/atlas_jaas.conf --create-collection --collection vertex_index --config-set atlas_configs --shards 1 --replication 1 --max-shards 1 --retry 5 --interval 10') self.assertResourceCalledRegexp('^Execute$', '^ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181/infra-solr --jaas-file /usr/hdp/current/atlas-server/conf/atlas_jaas.conf --create-collection --collection edge_index --config-set atlas_configs --shards 1 --replication 1 --max-shards 1 --retry 5 --interval 10') self.assertResourceCalledRegexp('^Execute$', '^ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181/infra-solr --jaas-file /usr/hdp/current/atlas-server/conf/atlas_jaas.conf --create-collection --collection fulltext_index --config-set atlas_configs --shards 1 --replication 1 --max-shards 1 --retry 5 --interval 10') self.assertResourceCalled('Execute', "ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181 --znode /infra-solr/configs/atlas_configs --jaas-file /usr/hdp/current/atlas-server/conf/atlas_jaas.conf --secure-znode --sasl-users atlas,infra-solr --retry 5 --interval 10") self.assertResourceCalled('Execute', "ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181 --znode /infra-solr/collections/vertex_index --jaas-file /usr/hdp/current/atlas-server/conf/atlas_jaas.conf --secure-znode --sasl-users atlas,infra-solr --retry 5 --interval 10") self.assertResourceCalled('Execute', "ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181 --znode /infra-solr/collections/edge_index --jaas-file /usr/hdp/current/atlas-server/conf/atlas_jaas.conf --secure-znode --sasl-users atlas,infra-solr --retry 5 --interval 10") self.assertResourceCalled('Execute', "ambari-sudo.sh JAVA_HOME=/usr/jdk64/jdk1.7.0_45 /usr/lib/ambari-infra-solr-client/solrCloudCli.sh --zookeeper-connect-string c6401.ambari.apache.org:2181 --znode /infra-solr/collections/fulltext_index --jaas-file /usr/hdp/current/atlas-server/conf/atlas_jaas.conf --secure-znode --sasl-users atlas,infra-solr --retry 5 --interval 10")
def execute(parameters=None, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: parameters (dictionary): a mapping of parameter key to value host_name (string): the name of this host where the alert is running """ if parameters is None: return (('UNKNOWN', ['There were no parameters supplied to the script.'])) transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT if HIVE_SERVER_TRANSPORT_MODE_KEY in parameters: transport_mode = parameters[HIVE_SERVER_TRANSPORT_MODE_KEY] port = THRIFT_PORT_DEFAULT if transport_mode.lower( ) == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in parameters: port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY]) elif transport_mode.lower( ) == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in parameters: port = int(parameters[HIVE_SERVER_THRIFT_HTTP_PORT_KEY]) security_enabled = False if SECURITY_ENABLED_KEY in parameters: security_enabled = str( parameters[SECURITY_ENABLED_KEY]).upper() == 'TRUE' hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT if HIVE_SERVER2_AUTHENTICATION_KEY in parameters: hive_server2_authentication = parameters[ HIVE_SERVER2_AUTHENTICATION_KEY] smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT if SMOKEUSER_PRINCIPAL_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_KEY] smokeuser = SMOKEUSER_DEFAULT if SMOKEUSER_KEY in parameters: smokeuser = parameters[SMOKEUSER_KEY] result_code = None if security_enabled: hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT if HIVE_SERVER_PRINCIPAL_KEY in parameters: hive_server_principal = parameters[HIVE_SERVER_PRINCIPAL_KEY] smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT if SMOKEUSER_KEYTAB_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_KEY] kinit_path_local = get_kinit_path() kinitcmd = format( "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; " ) else: hive_server_principal = None kinitcmd = None try: if host_name is None: host_name = socket.getfqdn() start_time = time.time() try: hive_check.check_thrift_port_sasl(host_name, port, hive_server2_authentication, hive_server_principal, kinitcmd, smokeuser, transport_mode=transport_mode) is_thrift_port_ok = True except: is_thrift_port_ok = False if is_thrift_port_ok == True: result_code = 'OK' total_time = time.time() - start_time label = OK_MESSAGE % (total_time, port) else: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, port) except Exception, e: label = str(e) result_code = 'UNKNOWN'
def execute(parameters=None, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: parameters (dictionary): a mapping of parameter key to value host_name (string): the name of this host where the alert is running """ if parameters is None: return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the script.']) if not OOZIE_URL_KEY in parameters: return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.']) # use localhost on Windows, 0.0.0.0 on others; 0.0.0.0 means bind to all # interfaces, which doesn't work on Windows localhost_address = 'localhost' if OSCheck.get_os_family() == OSConst.WINSRV_FAMILY else '0.0.0.0' oozie_url = parameters[OOZIE_URL_KEY] oozie_url = oozie_url.replace(urlparse(oozie_url).hostname,localhost_address) security_enabled = False if SECURITY_ENABLED in parameters: security_enabled = str(parameters[SECURITY_ENABLED]).upper() == 'TRUE' command = format("source /etc/oozie/conf/oozie-env.sh ; oozie admin -oozie {oozie_url} -status") try: # kinit if security is enabled so that oozie-env.sh can make the web request kerberos_env = None if security_enabled: if OOZIE_KEYTAB in parameters and OOZIE_PRINCIPAL in parameters: oozie_keytab = parameters[OOZIE_KEYTAB] oozie_principal = parameters[OOZIE_PRINCIPAL] # substitute _HOST in kerberos principal with actual fqdn oozie_principal = oozie_principal.replace('_HOST', host_name) else: return (RESULT_CODE_UNKNOWN, ['The Oozie keytab and principal are required parameters when security is enabled.']) # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl env = Environment.get_instance() ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, sep, getpid()) kerberos_env = {'KRB5CCNAME': ccache_file} klist_path_local = get_klist_path() klist_command = format("{klist_path_local} -s {ccache_file}") # Determine if we need to kinit by testing to see if the relevant cache exists and has # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number # it kinits we do but recover quickly when keytabs are regenerated return_code, _ = call(klist_command) if return_code != 0: kinit_path_local = get_kinit_path() kinit_command = format("{kinit_path_local} -l 5m -kt {oozie_keytab} {oozie_principal}; ") # kinit Execute(kinit_command, environment=kerberos_env) # execute the command Execute(command, environment=kerberos_env) return (RESULT_CODE_OK, ["Successful connection to {0}".format(oozie_url)]) except Exception, ex: return (RESULT_CODE_CRITICAL, [str(ex)])
config = Script.get_config() hadoop_user = config["configurations"]["cluster-env"]["hadoop.user.name"] yarn_user = hadoop_user hdfs_user = hadoop_user smokeuser = hadoop_user config_dir = os.environ["HADOOP_CONF_DIR"] hadoop_home = os.environ["HADOOP_HOME"] yarn_home = os.environ["HADOOP_YARN_HOME"] hadoop_ssl_enabled = default("/configurations/core-site/hadoop.ssl.enabled", False) _authentication = config["configurations"]["core-site"]["hadoop.security.authentication"] security_enabled = not is_empty(_authentication) and _authentication == "kerberos" smoke_user_keytab = config["configurations"]["hadoop-env"]["smokeuser_keytab"] kinit_path_local = functions.get_kinit_path(default("/configurations/kerberos-env/executable_search_paths", None)) rm_host = config["clusterHostInfo"]["rm_host"][0] rm_port = config["configurations"]["yarn-site"]["yarn.resourcemanager.webapp.address"].split(":")[-1] rm_https_port = "8090" rm_webui_address = format("{rm_host}:{rm_port}") rm_webui_https_address = format("{rm_host}:{rm_https_port}") hs_host = config["clusterHostInfo"]["hs_host"][0] hs_port = config["configurations"]["mapred-site"]["mapreduce.jobhistory.webapp.address"].split(":")[-1] hs_webui_address = format("{hs_host}:{hs_port}") hadoop_mapred2_jar_location = os.path.join(os.environ["HADOOP_COMMON_HOME"], "share", "hadoop", "mapreduce") hadoopMapredExamplesJarName = "hadoop-mapreduce-examples-2.*.jar" exclude_hosts = default("/clusterHostInfo/decom_nm_hosts", []) exclude_file_path = default(
def _make_web_request(self, url): """ Makes an http(s) request to a web resource and returns the http code. If there was an error making the request, return 0 for the status code. """ error_msg = None try: response_code = 0 kerberos_keytab = None kerberos_principal = None if self.uri_property_keys.kerberos_principal is not None: kerberos_principal = self._get_configuration_value(self.uri_property_keys.kerberos_principal) if kerberos_principal is not None: # substitute _HOST in kerberos principal with actual fqdn kerberos_principal = kerberos_principal.replace("_HOST", self.host_name) if self.uri_property_keys.kerberos_keytab is not None: kerberos_keytab = self._get_configuration_value(self.uri_property_keys.kerberos_keytab) if kerberos_principal is not None and kerberos_keytab is not None: # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. tmp_dir = self.config.get("agent", "tmp_dir") if tmp_dir is None: tmp_dir = gettempdir() ccache_file_name = _md5("{0}|{1}".format(kerberos_principal, kerberos_keytab)).hexdigest() ccache_file_path = "{0}{1}web_alert_cc_{2}".format(tmp_dir, os.sep, ccache_file_name) kerberos_env = {"KRB5CCNAME": ccache_file_path} # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache klist_path_local = get_klist_path() if os.system("{0} -s {1}".format(klist_path_local, ccache_file_path)) != 0: kinit_path_local = get_kinit_path() logger.debug( "[Alert][{0}] Enabling Kerberos authentication via GSSAPI using ccache at {1}.".format( self.get_name(), ccache_file_path ) ) os.system( "{0} -l 5m -c {1} -kt {2} {3} > /dev/null".format( kinit_path_local, ccache_file_path, kerberos_keytab, kerberos_principal ) ) else: logger.debug( "[Alert][{0}] Kerberos authentication via GSSAPI already enabled using ccache at {1}.".format( self.get_name(), ccache_file_path ) ) # check if cookies dir exists, if not then create it tmp_dir = self.config.get("agent", "tmp_dir") cookies_dir = os.path.join(tmp_dir, "cookies") if not os.path.exists(cookies_dir): os.makedirs(cookies_dir) cookie_file_name = str(uuid.uuid4()) cookie_file = os.path.join(cookies_dir, cookie_file_name) start_time = time.time() try: curl = subprocess.Popen( [ "curl", "--negotiate", "-u", ":", "-b", cookie_file, "-c", cookie_file, "-sL", "-w", "%{http_code}", url, "--connect-timeout", CURL_CONNECTION_TIMEOUT, "-o", "/dev/null", ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env, ) curl_stdout, curl_stderr = curl.communicate() finally: if os.path.isfile(cookie_file): os.remove(cookie_file) # empty quotes evaluates to false if curl_stderr: error_msg = curl_stderr # empty quotes evaluates to false if curl_stdout: response_code = int(curl_stdout) time_millis = time.time() - start_time else: # kerberos is not involved; use urllib2 response_code, time_millis, error_msg = self._make_web_request_urllib(url) return WebResponse(status_code=response_code, time_millis=time_millis, error_msg=error_msg) except Exception, exception: if logger.isEnabledFor(logging.DEBUG): logger.exception("[Alert][{0}] Unable to make a web request.".format(self.get_name())) return WebResponse(status_code=0, time_millis=0, error_msg=str(exception))
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return (('UNKNOWN', ['There were no configurations supplied to the script.'])) if not HIVE_METASTORE_URIS_KEY in configurations: return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.'])) metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',') security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' # defaults smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT smokeuser = SMOKEUSER_DEFAULT # check script params if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] # check configurations last as they should always take precedence if SMOKEUSER_PRINCIPAL_KEY in configurations: smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] if SMOKEUSER_KEY in configurations: smokeuser = configurations[SMOKEUSER_KEY] result_code = None try: if security_enabled: if SMOKEUSER_KEYTAB_KEY in configurations: smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinitcmd = format( "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; " ) Execute(kinitcmd, user=smokeuser, path=[ "/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/" ], timeout=10) if host_name is None: host_name = socket.getfqdn() for uri in metastore_uris: if host_name in uri: metastore_uri = uri conf_dir = HIVE_CONF_DIR_LEGACY bin_dir = HIVE_BIN_DIR_LEGACY if os.path.exists(HIVE_CONF_DIR): conf_dir = HIVE_CONF_DIR bin_dir = HIVE_BIN_DIR cmd = format("export HIVE_CONF_DIR='{conf_dir}' ; " "hive --hiveconf hive.metastore.uris={metastore_uri}\ --hiveconf hive.metastore.client.connect.retry.delay=1\ --hiveconf hive.metastore.failure.retries=1\ --hiveconf hive.metastore.connect.retries=1\ --hiveconf hive.metastore.client.socket.timeout=14\ --hiveconf hive.execution.engine=mr -e 'show databases;'") start_time = time.time() try: Execute(cmd, user=smokeuser, path=["/bin/", "/usr/bin/", "/usr/sbin/", bin_dir], timeout=30) total_time = time.time() - start_time result_code = 'OK' label = OK_MESSAGE.format(total_time) except: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, traceback.format_exc()) except: label = traceback.format_exc() result_code = 'UNKNOWN' return ((result_code, [label]))
def _make_web_request(self, url): """ Makes an http(s) request to a web resource and returns the http code. If there was an error making the request, return 0 for the status code. """ error_msg = None try: response_code = 0 kerberos_keytab = None kerberos_principal = None if self.uri_property_keys.kerberos_principal is not None: kerberos_principal = self._get_configuration_value( self.uri_property_keys.kerberos_principal) if kerberos_principal is not None: # substitute _HOST in kerberos principal with actual fqdn kerberos_principal = kerberos_principal.replace('_HOST', self.host_name) if self.uri_property_keys.kerberos_keytab is not None: kerberos_keytab = self._get_configuration_value(self.uri_property_keys.kerberos_keytab) if kerberos_principal is not None and kerberos_keytab is not None: # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. tmp_dir = self.config.get('agent', 'tmp_dir') if tmp_dir is None: tmp_dir = gettempdir() ccache_file_name = _md5("{0}|{1}".format(kerberos_principal, kerberos_keytab)).hexdigest() ccache_file_path = "{0}{1}web_alert_cc_{2}".format(tmp_dir, os.sep, ccache_file_name) kerberos_env = {'KRB5CCNAME': ccache_file_path} # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache klist_path_local = get_klist_path() if os.system("{0} -s {1}".format(klist_path_local, ccache_file_path)) != 0: kinit_path_local = get_kinit_path() logger.debug("[Alert][{0}] Enabling Kerberos authentication via GSSAPI using ccache at {1}.".format( self.get_name(), ccache_file_path)) os.system("{0} -l 5m -c {1} -kt {2} {3} > /dev/null".format( kinit_path_local, ccache_file_path, kerberos_keytab, kerberos_principal)) else: logger.debug("[Alert][{0}] Kerberos authentication via GSSAPI already enabled using ccache at {1}.".format( self.get_name(), ccache_file_path)) # check if cookies dir exists, if not then create it tmp_dir = self.config.get('agent', 'tmp_dir') cookies_dir = os.path.join(tmp_dir, "cookies") if not os.path.exists(cookies_dir): os.makedirs(cookies_dir) cookie_file_name = str(uuid.uuid4()) cookie_file = os.path.join(cookies_dir, cookie_file_name) start_time = time.time() try: curl = subprocess.Popen(['curl', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, '-sL', '-w', '%{http_code}', url, '--connect-timeout', CURL_CONNECTION_TIMEOUT, '-o', '/dev/null'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=kerberos_env) curl_stdout, curl_stderr = curl.communicate() finally: if os.path.isfile(cookie_file): os.remove(cookie_file) # empty quotes evaluates to false if curl_stderr: error_msg = curl_stderr # empty quotes evaluates to false if curl_stdout: response_code = int(curl_stdout) time_millis = time.time() - start_time else: # kerberos is not involved; use urllib2 response_code, time_millis, error_msg = self._make_web_request_urllib(url) return WebResponse(status_code=response_code, time_millis=time_millis, error_msg=error_msg) except Exception, exception: if logger.isEnabledFor(logging.DEBUG): logger.exception("[Alert][{0}] Unable to make a web request.".format(self.get_name())) return WebResponse(status_code=0, time_millis=0, error_msg=str(exception))
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return (('UNKNOWN', ['There were no configurations supplied to the script.'])) if not HIVE_METASTORE_URIS_KEY in configurations: return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.'])) metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',') security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT if CHECK_COMMAND_TIMEOUT_KEY in parameters: check_command_timeout = float(parameters[CHECK_COMMAND_TIMEOUT_KEY]) # defaults smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT smokeuser = SMOKEUSER_DEFAULT # check script params if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] # check configurations last as they should always take precedence if SMOKEUSER_PRINCIPAL_KEY in configurations: smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] if SMOKEUSER_KEY in configurations: smokeuser = configurations[SMOKEUSER_KEY] result_code = None try: if security_enabled: if SMOKEUSER_KEYTAB_KEY in configurations: smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; ") Execute(kinitcmd, user=smokeuser, path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"], timeout=10) if host_name is None: host_name = socket.getfqdn() for uri in metastore_uris: if host_name in uri: metastore_uri = uri conf_dir = HIVE_CONF_DIR_LEGACY bin_dir = HIVE_BIN_DIR_LEGACY if os.path.exists(HIVE_CONF_DIR): conf_dir = HIVE_CONF_DIR bin_dir = HIVE_BIN_DIR cmd = format("export HIVE_CONF_DIR='{conf_dir}' ; " "hive --hiveconf hive.metastore.uris={metastore_uri}\ --hiveconf hive.metastore.client.connect.retry.delay=1\ --hiveconf hive.metastore.failure.retries=1\ --hiveconf hive.metastore.connect.retries=1\ --hiveconf hive.metastore.client.socket.timeout=14\ --hiveconf hive.execution.engine=mr -e 'show databases;'") start_time = time.time() try: Execute(cmd, user=smokeuser, path=["/bin/", "/usr/bin/", "/usr/sbin/", bin_dir], timeout=int(check_command_timeout) ) total_time = time.time() - start_time result_code = 'OK' label = OK_MESSAGE.format(total_time) except: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, traceback.format_exc()) except: label = traceback.format_exc() result_code = 'UNKNOWN' return ((result_code, [label]))
all_hosts = default("/clusterHostInfo/all_hosts", []) all_racks = default("/clusterHostInfo/all_racks", []) all_ipv4_ips = default("/clusterHostInfo/all_ipv4_ips", []) slave_hosts = default("/clusterHostInfo/slave_hosts", []) #topology files net_topology_script_file_path = "/etc/hadoop/conf/topology_script.py" net_topology_script_dir = os.path.dirname(net_topology_script_file_path) net_topology_mapping_data_file_name = 'topology_mappings.data' net_topology_mapping_data_file_path = os.path.join( net_topology_script_dir, net_topology_mapping_data_file_name) #Added logic to create /tmp and /user directory for HCFS stack. has_core_site = 'core-site' in config['configurations'] hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab'] kinit_path_local = get_kinit_path() stack_version_unformatted = config['hostLevelParams']['stack_version'] stack_version_formatted = format_stack_version(stack_version_unformatted) hadoop_bin_dir = stack_select.get_hadoop_dir("bin") hdfs_principal_name = default('/configurations/hadoop-env/hdfs_principal_name', None) hdfs_site = config['configurations']['hdfs-site'] default_fs = config['configurations']['core-site']['fs.defaultFS'] smoke_user = config['configurations']['cluster-env']['smokeuser'] smoke_hdfs_user_dir = format("/user/{smoke_user}") smoke_hdfs_user_mode = 0770 ##### Namenode RPC ports - metrics config section start ##### # Figure out the rpc ports for current namenode nn_rpc_client_port = None
def get_check_command(oozie_url, host_name, configurations, parameters): kerberos_env = None smokeuser = SMOKEUSER_DEFAULT if SMOKEUSER_KEY in configurations: smokeuser = configurations[SMOKEUSER_KEY] security_enabled = False if SECURITY_ENABLED in configurations: security_enabled = str(configurations[SECURITY_ENABLED]).upper() == 'TRUE' if security_enabled: # defaults smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT # check script params if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] # check configurations last as they should always take precedence if SMOKEUSER_PRINCIPAL_KEY in configurations: smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] if SMOKEUSER_KEYTAB_KEY in configurations: smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl env = Environment.get_instance() ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, os.sep, os.getpid()) kerberos_env = {'KRB5CCNAME': ccache_file} # Get the configured Kerberos executable search paths, if any kerberos_executable_search_paths = None if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] klist_path_local = get_klist_path(kerberos_executable_search_paths) klist_command = format("{klist_path_local} -s {ccache_file}") # Determine if we need to kinit by testing to see if the relevant cache exists and has # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number # it kinits we do but recover quickly when keytabs are regenerated return_code, _ = call(klist_command, user=smokeuser) if return_code != 0: kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinit_command = format("{kinit_path_local} -l 5m -kt {smokeuser_keytab} {smokeuser_principal}; ") # kinit Execute(kinit_command, environment=kerberos_env, user=smokeuser) # oozie configuration directory uses a symlink when > HDP 2.2 oozie_config_directory = OOZIE_CONF_DIR_LEGACY if os.path.exists(OOZIE_CONF_DIR): oozie_config_directory = OOZIE_CONF_DIR command = "source {0}/oozie-env.sh ; oozie admin -oozie {1} -status".format( oozie_config_directory, oozie_url) return (command, kerberos_env, smokeuser)
spark_history_ui_port = config['configurations']['spark2-defaults'][ 'spark.history.ui.port'] if ui_ssl_enabled: spark_history_ui_port = str(int(spark_history_ui_port) + 400) spark_env_sh = config['configurations']['spark2-env']['content'] spark_log4j_properties = config['configurations']['spark2-log4j-properties'][ 'content'] spark_metrics_properties = config['configurations'][ 'spark2-metrics-properties']['content'] hive_server_host = default("/clusterHostInfo/hive_server_host", []) is_hive_installed = not len(hive_server_host) == 0 security_enabled = config['configurations']['cluster-env']['security_enabled'] kinit_path_local = get_kinit_path( default('/configurations/kerberos-env/executable_search_paths', None)) spark_kerberos_keytab = config['configurations']['spark2-defaults'][ 'spark.history.kerberos.keytab'] spark_kerberos_principal = config['configurations']['spark2-defaults'][ 'spark.history.kerberos.principal'] spark_thriftserver_hosts = default( "/clusterHostInfo/spark2_thriftserver_hosts", []) has_spark_thriftserver = not len(spark_thriftserver_hosts) == 0 # hive-site params spark_hive_properties = { 'hive.metastore.uris': config['configurations']['hive-site']['hive.metastore.uris'] }
#For SQLA explicitly disable audit to DB for Ranger if xa_audit_db_flavor == 'sqla': xa_audit_db_is_enabled = False namenode_hosts = default("/clusterHostInfo/namenode_host", []) has_namenode = not len(namenode_hosts) == 0 hdfs_user = config['configurations']['hadoop-env']['hdfs_user'] if has_namenode else None hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab'] if has_namenode else None hdfs_principal_name = config['configurations']['hadoop-env']['hdfs_principal_name'] if has_namenode else None hdfs_site = config['configurations']['hdfs-site'] if has_namenode else None default_fs = config['configurations']['core-site']['fs.defaultFS'] if has_namenode else None hadoop_bin_dir = hdp_select.get_hadoop_dir("bin") if has_namenode else None hadoop_conf_dir = conf_select.get_hadoop_conf_dir() if has_namenode else None kinit_path_local = get_kinit_path(default('/configurations/kerberos-env/executable_search_paths', None)) import functools #create partial functions with common arguments for every HdfsResource call #to create/delete hdfs directory/file/copyfromlocal we need to call params.HdfsResource in code HdfsResource = functools.partial( HdfsResource, user=hdfs_user, security_enabled = security_enabled, keytab = hdfs_user_keytab, kinit_path_local = kinit_path_local, hadoop_bin_dir = hadoop_bin_dir, hadoop_conf_dir = hadoop_conf_dir, principal_name = hdfs_principal_name, hdfs_site = hdfs_site, default_fs = default_fs
# server configurations config = Script.get_config() yarn_user = "******" hdfs_user = "******" smokeuser = "******" config_dir = os.environ["HADOOP_CONF_DIR"] hadoop_home = os.environ["HADOOP_HOME"] yarn_home = os.environ["HADOOP_YARN_HOME"] hadoop_ssl_enabled = default("/configurations/core-site/hadoop.ssl.enabled", False) _authentication = config['configurations']['core-site']['hadoop.security.authentication'] security_enabled = ( not is_empty(_authentication) and _authentication == 'kerberos') smoke_user_keytab = config['configurations']['hadoop-env']['smokeuser_keytab'] kinit_path_local = functions.get_kinit_path() rm_host = config['clusterHostInfo']['rm_host'][0] rm_port = config['configurations']['yarn-site']['yarn.resourcemanager.webapp.address'].split(':')[-1] rm_https_port = "8090" rm_webui_address = format("{rm_host}:{rm_port}") rm_webui_https_address = format("{rm_host}:{rm_https_port}") hs_host = config['clusterHostInfo']['hs_host'][0] hs_port = config['configurations']['mapred-site']['mapreduce.jobhistory.webapp.address'].split(':')[-1] hs_webui_address = format("{hs_host}:{hs_port}") hadoop_mapred2_jar_location = os.path.join(os.environ["HADOOP_COMMON_HOME"], "share", "hadoop", "mapreduce") hadoopMapredExamplesJarName = "hadoop-mapreduce-examples-2.*.jar" exclude_hosts = default("/clusterHostInfo/decom_nm_hosts", []) exclude_file_path = default("/configurations/yarn-site/yarn.resourcemanager.nodes.exclude-path","/etc/hadoop/conf/yarn.exclude")
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return ('UNKNOWN', ['There were no configurations supplied to the script.']) transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations: transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY] port = THRIFT_PORT_DEFAULT if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations: port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY]) elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations: port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY]) security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str(configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' hive_server2_authentication = HIVE_SERVER2_AUTHENTICATION_DEFAULT if HIVE_SERVER2_AUTHENTICATION_KEY in configurations: hive_server2_authentication = configurations[HIVE_SERVER2_AUTHENTICATION_KEY] # defaults smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT smokeuser = SMOKEUSER_DEFAULT # check script params if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] # check configurations last as they should always take precedence if SMOKEUSER_PRINCIPAL_KEY in configurations: smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] if SMOKEUSER_KEY in configurations: smokeuser = configurations[SMOKEUSER_KEY] result_code = None if security_enabled: hive_server_principal = HIVE_SERVER_PRINCIPAL_DEFAULT if HIVE_SERVER_PRINCIPAL_KEY in configurations: hive_server_principal = configurations[HIVE_SERVER_PRINCIPAL_KEY] if SMOKEUSER_KEYTAB_KEY in configurations: smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinitcmd=format("{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; ") else: hive_server_principal = None kinitcmd=None try: if host_name is None: host_name = socket.getfqdn() start_time = time.time() try: hive_check.check_thrift_port_sasl(host_name, port, hive_server2_authentication, hive_server_principal, kinitcmd, smokeuser, transport_mode = transport_mode) result_code = 'OK' total_time = time.time() - start_time label = OK_MESSAGE.format(total_time, port) except Exception, exception: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, port, str(exception)) except Exception, e: label = str(e) result_code = 'UNKNOWN'
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ spark_home = os.path.join(stack_root, 'spark') if configurations is None: return ('UNKNOWN', ['There were no configurations supplied to the script.']) transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations: transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY] port = THRIFT_PORT_DEFAULT if transport_mode.lower( ) == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations: port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY]) elif transport_mode.lower( ) == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations: port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY]) security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' hive_kerberos_keytab = None if HIVE_SERVER2_KERBEROS_KEYTAB in configurations: hive_kerberos_keytab = configurations[HIVE_SERVER2_KERBEROS_KEYTAB] if host_name is None: host_name = socket.getfqdn() hive_principal = None if HIVE_SERVER2_PRINCIPAL_KEY in configurations: hive_principal = configurations[HIVE_SERVER2_PRINCIPAL_KEY] hive_principal = hive_principal.replace('_HOST', host_name.lower()) # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) sparkuser = configurations[SPARK_USER_KEY] if security_enabled: kinitcmd = format( "{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; ") # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=sparkuser) finally: kinit_lock.release() result_code = None try: if host_name is None: host_name = socket.getfqdn() if security_enabled: beeline_url = [ "'jdbc:hive2://{host_name}:{port}/default;principal={hive_principal}'", "transportMode={transport_mode}" ] else: beeline_url = [ "'jdbc:hive2://{host_name}:{port}/default'", "transportMode={transport_mode}" ] # append url according to used transport beeline_cmd = os.path.join(spark_home, "bin", "beeline") cmd = "! %s -u '%s' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL' -e 'Error: Could not open'" % \ (beeline_cmd, format(";".join(beeline_url))) start_time = time.time() try: Execute( cmd, user=sparkuser, path=[beeline_cmd], timeout=CHECK_COMMAND_TIMEOUT_DEFAULT) total_time = time.time() - start_time result_code = 'OK' label = OK_MESSAGE.format(total_time, port) except: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc()) except: label = traceback.format_exc() result_code = 'UNKNOWN' return (result_code, [label])