def kinit(kinit_path_local, keytab_path, principal_name, execute_user=None): # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() kinitcmd = "{0} -kt {1} {2}; ".format(kinit_path_local, keytab_path, principal_name) Logger.info("kinit command: " + kinitcmd + " as user: " + str(execute_user)) try: if execute_user is None: Execute(kinitcmd) else: Execute(kinitcmd, user=execute_user) finally: kinit_lock.release()
def check_thrift_port_sasl(address, port, hive_auth="NOSASL", key=None, kinitcmd=None, smokeuser='******', transport_mode="binary", http_endpoint="cliservice", ssl=False, ssl_keystore=None, ssl_password=None, check_command_timeout=30): """ Hive thrift SASL port check """ # check params to be correctly passed, if not - try to cast them if isinstance(port, str): port = int(port) if isinstance(ssl, str): ssl = bool(ssl) # to pass as beeline argument ssl_str = str(ssl).lower() beeline_url = ['jdbc:hive2://{address}:{port}/', "transportMode={transport_mode}"] # append url according to used transport if transport_mode == "http": beeline_url.append('httpPath={http_endpoint}') # append url according to used auth if hive_auth == "NOSASL": beeline_url.append('auth=noSasl') # append url according to ssl configuration if ssl and ssl_keystore is not None and ssl_password is not None: beeline_url.extend(['ssl={ssl_str}', 'sslTrustStore={ssl_keystore}', 'trustStorePassword={ssl_password!p}']) # append url according to principal and execute kinit if kinitcmd: beeline_url.append('principal={key}') # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=smokeuser) finally: kinit_lock.release() cmd = "! beeline -u '%s' -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'" % \ format(";".join(beeline_url)) Execute(cmd, user=smokeuser, path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"], timeout=check_command_timeout)
def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, krb_exec_search_paths, return_only_http_code, caller_label, user, connection_timeout = CONNECTION_TIMEOUT_DEFAULT, kinit_timer_ms=DEFAULT_KERBEROS_KINIT_TIMER_MS, method = '',body='',header=''): """ Makes a curl request using the kerberos credentials stored in a calculated cache file. The cache file is created by combining the supplied principal, keytab, user, and request name into a unique hash. This function will use the klist command to determine if the cache is expired and will perform a kinit if necessary. Additionally, it has an internal timer to force a kinit after a configurable amount of time. This is to prevent boundary issues where requests hit the edge of a ticket's lifetime. :param tmp_dir: the directory to use for storing the local kerberos cache for this request. :param keytab: the location of the keytab to use when performing a kinit :param principal: the principal to use when performing a kinit :param url: the URL to request :param cache_file_prefix: an identifier used to build the unique cache name for this request. This ensures that multiple requests can use the same cache. :param krb_exec_search_paths: the search path to use for invoking kerberos binaries :param return_only_http_code: True to return only the HTTP code, False to return GET content :param caller_label: an identifier to give context into the caller of this module (used for logging) :param user: the user to invoke the curl command as :param connection_timeout: if specified, a connection timeout for curl (default 10 seconds) :param kinit_timer_ms: if specified, the time (in ms), before forcing a kinit even if the klist cache is still valid. :return: """ import uuid # start off false is_kinit_required = False # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. Scope # this file by user in order to prevent sharing of cache files by multiple users. ccache_file_name = _md5("{0}|{1}".format(principal, keytab)).hexdigest() curl_krb_cache_path = os.path.join(tmp_dir, "curl_krb_cache") if not os.path.exists(curl_krb_cache_path): os.makedirs(curl_krb_cache_path) os.chmod(curl_krb_cache_path, 0777) ccache_file_path = "{0}{1}{2}_{3}_cc_{4}".format(curl_krb_cache_path, os.sep, cache_file_prefix, user, ccache_file_name) kerberos_env = {'KRB5CCNAME': ccache_file_path} # concurrent kinit's can cause the following error: # Internal credentials cache error while storing credentials while getting initial credentials kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache if krb_exec_search_paths: klist_path_local = get_klist_path(krb_exec_search_paths) else: klist_path_local = get_klist_path() # take a look at the last time kinit was run for the specified cache and force a new # kinit if it's time; this helps to avoid problems approaching ticket boundary when # executing a klist and then a curl last_kinit_time = _KINIT_CACHE_TIMES.get(ccache_file_name, 0) current_time = long(time.time()) if current_time - kinit_timer_ms > last_kinit_time: is_kinit_required = True # if the time has not expired, double-check that the cache still has a valid ticket if not is_kinit_required: klist_command = "{0} -s {1}".format(klist_path_local, ccache_file_path) is_kinit_required = (shell.call(klist_command, user=user)[0] != 0) # if kinit is required, the perform the kinit if is_kinit_required: if krb_exec_search_paths: kinit_path_local = get_kinit_path(krb_exec_search_paths) else: kinit_path_local = get_kinit_path() logger.debug("Enabling Kerberos authentication for %s via GSSAPI using ccache at %s", caller_label, ccache_file_path) # kinit; there's no need to set a ticket timeout as this will use the default invalidation # configured in the krb5.conf - regenerating keytabs will not prevent an existing cache # from working correctly shell.checked_call("{0} -c {1} -kt {2} {3} > /dev/null".format(kinit_path_local, ccache_file_path, keytab, principal), user=user) # record kinit time _KINIT_CACHE_TIMES[ccache_file_name] = current_time else: # no kinit needed, use the cache logger.debug("Kerberos authentication for %s via GSSAPI already enabled using ccache at %s.", caller_label, ccache_file_path) finally: kinit_lock.release() # check if cookies dir exists, if not then create it cookies_dir = os.path.join(tmp_dir, "cookies") if not os.path.exists(cookies_dir): os.makedirs(cookies_dir) cookie_file_name = str(uuid.uuid4()) cookie_file = os.path.join(cookies_dir, cookie_file_name) start_time = time.time() error_msg = None # setup timeouts for the request; ensure we use integers since that is what curl needs connection_timeout = int(connection_timeout) maximum_timeout = connection_timeout + 2 try: if return_only_http_code: _, curl_stdout, curl_stderr = get_user_call_output(['curl', '-L', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, '-w', '%{http_code}', url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout), '-o', '/dev/null'], user=user, env=kerberos_env) else: curl_command = ['curl', '-L', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout)] # returns response body if len(method) > 0 and len(body) == 0 and len(header) == 0: curl_command.extend(['-X', method]) elif len(method) > 0 and len(body) == 0 and len(header) > 0: curl_command.extend(['-H', header, '-X', method]) elif len(method) > 0 and len(body) > 0 and len(header) == 0: curl_command.extend(['-X', method, '-d', body]) elif len(method) > 0 and len(body) > 0 and len(header) > 0: curl_command.extend(['-H', header, '-X', method, '-d', body]) _, curl_stdout, curl_stderr = get_user_call_output(curl_command, user=user, env=kerberos_env) except Fail: if logger.isEnabledFor(logging.DEBUG): logger.exception("Unable to make a curl request for {0}.".format(caller_label)) raise finally: if os.path.isfile(cookie_file): os.remove(cookie_file) # empty quotes evaluates to false if curl_stderr: error_msg = curl_stderr time_millis = time.time() - start_time # empty quotes evaluates to false if curl_stdout: if return_only_http_code: return (int(curl_stdout), error_msg, time_millis) else: return (curl_stdout, error_msg, time_millis) logger.debug("The curl response for %s is empty; standard error = %s", caller_label, str(error_msg)) return ("", error_msg, time_millis)
def get_check_command(oozie_url, host_name, configurations, parameters, only_kinit): kerberos_env = None user = USER_DEFAULT if USER_KEY in configurations: user = configurations[USER_KEY] if is_security_enabled(configurations): # defaults user_keytab = USER_KEYTAB_DEFAULT user_principal = USER_PRINCIPAL_DEFAULT # check script params if USER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: user_principal = parameters[USER_PRINCIPAL_SCRIPT_PARAM_KEY] user_principal = user_principal.replace('_HOST', host_name.lower()) if USER_KEYTAB_SCRIPT_PARAM_KEY in parameters: user_keytab = parameters[USER_KEYTAB_SCRIPT_PARAM_KEY] # check configurations last as they should always take precedence if USER_PRINCIPAL_KEY in configurations: user_principal = configurations[USER_PRINCIPAL_KEY] user_principal = user_principal.replace('_HOST', host_name.lower()) if USER_KEYTAB_KEY in configurations: user_keytab = configurations[USER_KEYTAB_KEY] # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl env = Environment.get_instance() ccache_file = "{0}{1}oozie_alert_cc_{2}".format( env.tmp_dir, os.sep, os.getpid()) kerberos_env = {'KRB5CCNAME': ccache_file} # Get the configured Kerberos executable search paths, if any kerberos_executable_search_paths = None if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] klist_path_local = get_klist_path(kerberos_executable_search_paths) kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinit_part_command = format( "{kinit_path_local} -l 5m20s -c {ccache_file} -kt {user_keytab} {user_principal}; " ) # Determine if we need to kinit by testing to see if the relevant cache exists and has # non-expired tickets. Tickets are marked to expire after 5 minutes to help reduce the number # it kinits we do but recover quickly when keytabs are regenerated if only_kinit: kinit_command = kinit_part_command else: kinit_command = "{0} -s {1} || ".format( klist_path_local, ccache_file) + kinit_part_command # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinit_command, environment=kerberos_env, user=user) finally: kinit_lock.release() # oozie configuration directory using a symlink oozie_config_directory = OOZIE_CONF_DIR_LEGACY if os.path.exists(OOZIE_CONF_DIR): oozie_config_directory = OOZIE_CONF_DIR command = "source {0}/oozie-env.sh ; oozie admin -oozie {1} -status".format( oozie_config_directory, oozie_url) return (command, kerberos_env, user)
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ LLAP_APP_STATUS_CMD_TIMEOUT = 0 if configurations is None: return ('UNKNOWN', ['There were no configurations supplied to the script.']) result_code = None try: security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT if CHECK_COMMAND_TIMEOUT_KEY in configurations: check_command_timeout = int(parameters[CHECK_COMMAND_TIMEOUT_KEY]) hive_user = HIVE_USER_DEFAULT if HIVE_USER_KEY in configurations: hive_user = configurations[HIVE_USER_KEY] llap_app_name = LLAP_APP_NAME_DEFAULT if LLAP_APP_NAME_KEY in configurations: llap_app_name = configurations[LLAP_APP_NAME_KEY] if security_enabled: if HIVE_PRINCIPAL_KEY in configurations: llap_principal = configurations[HIVE_PRINCIPAL_KEY] else: llap_principal = HIVE_PRINCIPAL_DEFAULT llap_principal = llap_principal.replace('_HOST', host_name.lower()) llap_keytab = HIVE_PRINCIPAL_KEYTAB_DEFAULT if HIVE_PRINCIPAL_KEYTAB_KEY in configurations: llap_keytab = configurations[HIVE_PRINCIPAL_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinitcmd = format( "{kinit_path_local} -kt {llap_keytab} {llap_principal}; ") # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=hive_user, path=[ "/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/" ], timeout=10) finally: kinit_lock.release() start_time = time.time() if STACK_NAME in configurations and STACK_ROOT in configurations: stack_root = stack_tools.get_stack_root(configurations[STACK_NAME], configurations[STACK_ROOT]) llap_status_cmd = stack_root + format( "/current/hive-server2-hive2/bin/hive --service llapstatus --name {llap_app_name} --findAppTimeout {LLAP_APP_STATUS_CMD_TIMEOUT}" ) else: llap_status_cmd = STACK_ROOT_DEFAULT + format( "/current/hive-server2-hive2/bin/hive --service llapstatus --name {llap_app_name} --findAppTimeout {LLAP_APP_STATUS_CMD_TIMEOUT}" ) code, output, error = shell.checked_call(llap_status_cmd, user=hive_user, stderr=subprocess.PIPE, timeout=check_command_timeout, logoutput=False) # Call for getting JSON llap_app_info = make_valid_json(output) if llap_app_info is None or 'state' not in llap_app_info: alert_label = traceback.format_exc() result_code = UKNOWN_STATUS_CODE return (result_code, [alert_label]) retrieved_llap_app_state = llap_app_info['state'].upper() if retrieved_llap_app_state in ['RUNNING_ALL']: result_code = OK_RESULT_CODE total_time = time.time() - start_time alert_label = OK_MESSAGE.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) elif retrieved_llap_app_state in ['RUNNING_PARTIAL']: live_instances = 0 desired_instances = 0 percentInstancesUp = 0 percent_desired_instances_to_be_up = 80 # Get 'live' and 'desired' instances if 'liveInstances' not in llap_app_info or 'desiredInstances' not in llap_app_info: result_code = CRITICAL_RESULT_CODE total_time = time.time() - start_time alert_label = CRITICAL_MESSAGE_WITH_STATE.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) return (result_code, [alert_label]) live_instances = llap_app_info['liveInstances'] desired_instances = llap_app_info['desiredInstances'] if live_instances < 0 or desired_instances <= 0: result_code = CRITICAL_RESULT_CODE total_time = time.time() - start_time alert_label = CRITICAL_MESSAGE_WITH_STATE.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) return (result_code, [alert_label]) percentInstancesUp = float( live_instances) / desired_instances * 100 if percentInstancesUp >= percent_desired_instances_to_be_up: result_code = OK_RESULT_CODE total_time = time.time() - start_time alert_label = MESSAGE_WITH_STATE_AND_INSTANCES.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time, llap_app_info['liveInstances'], llap_app_info['desiredInstances']) else: result_code = CRITICAL_RESULT_CODE total_time = time.time() - start_time alert_label = MESSAGE_WITH_STATE_AND_INSTANCES.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time, llap_app_info['liveInstances'], llap_app_info['desiredInstances']) else: result_code = CRITICAL_RESULT_CODE total_time = time.time() - start_time alert_label = CRITICAL_MESSAGE_WITH_STATE.format( llap_app_state_dict.get(retrieved_llap_app_state, retrieved_llap_app_state), total_time) except: alert_label = traceback.format_exc() traceback.format_exc() result_code = UKNOWN_STATUS_CODE return (result_code, [alert_label])
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return (('UNKNOWN', ['There were no configurations supplied to the script.'])) if not HIVE_METASTORE_URIS_KEY in configurations: return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.'])) metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',') security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT if CHECK_COMMAND_TIMEOUT_KEY in parameters: check_command_timeout = float(parameters[CHECK_COMMAND_TIMEOUT_KEY]) # defaults smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT smokeuser = SMOKEUSER_DEFAULT # check script params if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] # check configurations last as they should always take precedence if SMOKEUSER_PRINCIPAL_KEY in configurations: smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] if SMOKEUSER_KEY in configurations: smokeuser = configurations[SMOKEUSER_KEY] result_code = None try: if security_enabled: if SMOKEUSER_KEYTAB_KEY in configurations: smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinitcmd = format( "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; " ) # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=smokeuser, path=[ "/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/" ], timeout=10) finally: kinit_lock.release() if host_name is None: host_name = socket.getfqdn() for uri in metastore_uris: if host_name in uri: metastore_uri = uri conf_dir = HIVE_CONF_DIR_LEGACY bin_dir = HIVE_BIN_DIR_LEGACY if STACK_ROOT in configurations: hive_conf_dir = configurations[STACK_ROOT] + format( "/current/hive-metastore/conf/conf.server") hive_bin_dir = configurations[STACK_ROOT] + format( "/current/hive-metastore/bin") if os.path.exists(hive_conf_dir): conf_dir = hive_conf_dir bin_dir = hive_bin_dir cmd = format("export HIVE_CONF_DIR='{conf_dir}' ; " "hive --hiveconf hive.metastore.uris={metastore_uri}\ --hiveconf hive.metastore.client.connect.retry.delay=1\ --hiveconf hive.metastore.failure.retries=1\ --hiveconf hive.metastore.connect.retries=1\ --hiveconf hive.metastore.client.socket.timeout=14\ --hiveconf hive.execution.engine=mr -e 'show databases;'") start_time = time.time() try: Execute( cmd, user=smokeuser, path=["/bin/", "/usr/bin/", "/usr/sbin/", bin_dir], timeout=int(check_command_timeout), timeout_kill_strategy=TerminateStrategy.KILL_PROCESS_TREE, ) total_time = time.time() - start_time result_code = 'OK' label = OK_MESSAGE.format(total_time) except: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, traceback.format_exc()) except: label = traceback.format_exc() result_code = 'UNKNOWN' return ((result_code, [label]))
def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, krb_exec_search_paths, return_only_http_code, caller_label, user, connection_timeout=CONNECTION_TIMEOUT_DEFAULT, ca_certs=None, kinit_timer_ms=DEFAULT_KERBEROS_KINIT_TIMER_MS, method='', body='', header=''): """ Makes a curl request using the kerberos credentials stored in a calculated cache file. The cache file is created by combining the supplied principal, keytab, user, and request name into a unique hash. This function will use the klist command to determine if the cache is expired and will perform a kinit if necessary. Additionally, it has an internal timer to force a kinit after a configurable amount of time. This is to prevent boundary issues where requests hit the edge of a ticket's lifetime. :param tmp_dir: the directory to use for storing the local kerberos cache for this request. :param keytab: the location of the keytab to use when performing a kinit :param principal: the principal to use when performing a kinit :param url: the URL to request :param cache_file_prefix: an identifier used to build the unique cache name for this request. This ensures that multiple requests can use the same cache. :param krb_exec_search_paths: the search path to use for invoking kerberos binaries :param return_only_http_code: True to return only the HTTP code, False to return GET content :param caller_label: an identifier to give context into the caller of this module (used for logging) :param user: the user to invoke the curl command as :param connection_timeout: if specified, a connection timeout for curl (default 10 seconds) :param ca_certs: path to certificates :param kinit_timer_ms: if specified, the time (in ms), before forcing a kinit even if the klist cache is still valid. :return: """ import uuid # backward compatibility with old code and management packs, etc. All new code need pass ca_certs explicitly if ca_certs is None: try: from ambari_agent.AmbariConfig import AmbariConfig ca_certs = AmbariConfig.get_resolved_config( ).get_ca_cert_file_path() except: pass # start off false is_kinit_required = False # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl. Use a hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. Scope # this file by user in order to prevent sharing of cache files by multiple users. ccache_file_name = HASH_ALGORITHM("{0}|{1}".format(principal, keytab)).hexdigest() curl_krb_cache_path = os.path.join(tmp_dir, "curl_krb_cache") if not os.path.exists(curl_krb_cache_path): os.makedirs(curl_krb_cache_path) os.chmod(curl_krb_cache_path, 01777) ccache_file_path = "{0}{1}{2}_{3}_cc_{4}".format(curl_krb_cache_path, os.sep, cache_file_prefix, user, ccache_file_name) kerberos_env = {'KRB5CCNAME': ccache_file_path} # concurrent kinit's can cause the following error: # Internal credentials cache error while storing credentials while getting initial credentials kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache if krb_exec_search_paths: klist_path_local = get_klist_path(krb_exec_search_paths) else: klist_path_local = get_klist_path() # take a look at the last time kinit was run for the specified cache and force a new # kinit if it's time; this helps to avoid problems approaching ticket boundary when # executing a klist and then a curl last_kinit_time = _KINIT_CACHE_TIMES.get(ccache_file_name, 0) current_time = long(time.time()) if current_time - kinit_timer_ms > last_kinit_time: is_kinit_required = True # if the time has not expired, double-check that the cache still has a valid ticket if not is_kinit_required: klist_command = "{0} -s {1}".format(klist_path_local, ccache_file_path) is_kinit_required = (shell.call(klist_command, user=user)[0] != 0) # if kinit is required, the perform the kinit if is_kinit_required: if krb_exec_search_paths: kinit_path_local = get_kinit_path(krb_exec_search_paths) else: kinit_path_local = get_kinit_path() logger.debug( "Enabling Kerberos authentication for %s via GSSAPI using ccache at %s", caller_label, ccache_file_path) # kinit; there's no need to set a ticket timeout as this will use the default invalidation # configured in the krb5.conf - regenerating keytabs will not prevent an existing cache # from working correctly shell.checked_call("{0} -c {1} -kt {2} {3} > /dev/null".format( kinit_path_local, ccache_file_path, keytab, principal), user=user) # record kinit time _KINIT_CACHE_TIMES[ccache_file_name] = current_time else: # no kinit needed, use the cache logger.debug( "Kerberos authentication for %s via GSSAPI already enabled using ccache at %s.", caller_label, ccache_file_path) finally: kinit_lock.release() # check if cookies dir exists, if not then create it cookies_dir = os.path.join(tmp_dir, "cookies") if not os.path.exists(cookies_dir): os.makedirs(cookies_dir) cookie_file_name = str(uuid.uuid4()) cookie_file = os.path.join(cookies_dir, cookie_file_name) start_time = time.time() error_msg = None # setup timeouts for the request; ensure we use integers since that is what curl needs connection_timeout = int(connection_timeout) maximum_timeout = connection_timeout + 2 ssl_options = ['-k'] if ca_certs: ssl_options = ['--cacert', ca_certs] try: if return_only_http_code: _, curl_stdout, curl_stderr = get_user_call_output( ['curl', '--location-trusted'] + ssl_options + [ '--negotiate', '-u', admin_username + ':' + admin_password, '-b', cookie_file, '-c', cookie_file, '-w', '%{http_code}', url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout), '-o', '/dev/null' ], user=user, env=kerberos_env) else: curl_command = ['curl', '--location-trusted'] + ssl_options + [ '--negotiate', '-u', admin_username + ':' + admin_password, '-b', cookie_file, '-c', cookie_file, url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout) ] # returns response body if len(method) > 0 and len(body) == 0 and len(header) == 0: curl_command.extend(['-X', method]) elif len(method) > 0 and len(body) == 0 and len(header) > 0: curl_command.extend(['-H', header, '-X', method]) elif len(method) > 0 and len(body) > 0 and len(header) == 0: curl_command.extend(['-X', method, '-d', body]) elif len(method) > 0 and len(body) > 0 and len(header) > 0: curl_command.extend(['-H', header, '-X', method, '-d', body]) _, curl_stdout, curl_stderr = get_user_call_output( curl_command, user=user, env=kerberos_env) except Fail: if logger.isEnabledFor(logging.DEBUG): logger.exception( "Unable to make a curl request for {0}.".format(caller_label)) raise finally: if os.path.isfile(cookie_file): os.remove(cookie_file) # empty quotes evaluates to false if curl_stderr: error_msg = curl_stderr time_millis = time.time() - start_time # empty quotes evaluates to false if curl_stdout: if return_only_http_code: return (int(curl_stdout), error_msg, time_millis) else: return (curl_stdout, error_msg, time_millis) logger.debug("The curl response for %s is empty; standard error = %s", caller_label, str(error_msg)) return ("", error_msg, time_millis)
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return (UKNOWN_STATUS_CODE, ['There were no configurations supplied to the script.']) result_code = None try: use_external_hbase = False if USE_EXTERNAL_HBASE_KEY in configurations: use_external_hbase = str( configurations[USE_EXTERNAL_HBASE_KEY]).upper() == 'TRUE' if use_external_hbase: return (OK_RESULT_CODE, ['use_external_hbase set to true.']) is_hbase_system_service_launch = False if ATS_HBASE_SYSTEM_SERVICE_LAUNCH_KEY in configurations: is_hbase_system_service_launch = str( configurations[ATS_HBASE_SYSTEM_SERVICE_LAUNCH_KEY]).upper( ) == 'TRUE' yarn_hbase_user = "******" if ATS_HBASE_USER_KEY in configurations: yarn_hbase_user = configurations[ATS_HBASE_USER_KEY] if not is_hbase_system_service_launch: yarn_hbase_pid_dir_prefix = "" if ATS_HBASE_PID_DIR_PREFIX in configurations: yarn_hbase_pid_dir_prefix = configurations[ ATS_HBASE_PID_DIR_PREFIX] else: return (UKNOWN_STATUS_CODE, [ 'The yarn_hbase_pid_dir_prefix is a required parameter.' ]) yarn_hbase_pid_dir = format( "{yarn_hbase_pid_dir_prefix}/{yarn_hbase_user}") master_pid_file = format( "{yarn_hbase_pid_dir}/hbase-{yarn_hbase_user}-master.pid") rs_pid_file = format( "{yarn_hbase_pid_dir}/hbase-{yarn_hbase_user}-regionserver.pid" ) if host_name is None: host_name = socket.getfqdn() master_process_running = is_monitor_process_live(master_pid_file) rs_process_running = is_monitor_process_live(rs_pid_file) alert_state = OK_RESULT_CODE if master_process_running and rs_process_running else CRITICAL_RESULT_CODE alert_label = 'ATS embedded HBase is running on {0}' if master_process_running and rs_process_running else 'ATS embedded HBase is NOT running on {0}' alert_label = alert_label.format(host_name) return (alert_state, [alert_label]) else: security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT if CHECK_COMMAND_TIMEOUT_KEY in configurations: check_command_timeout = int( parameters[CHECK_COMMAND_TIMEOUT_KEY]) if security_enabled: if ATS_HBASE_PRINCIPAL_KEY in configurations: ats_hbase_app_principal = configurations[ ATS_HBASE_PRINCIPAL_KEY] ats_hbase_app_principal = ats_hbase_app_principal.replace( '_HOST', host_name.lower()) if ATS_HBASE_PRINCIPAL_KEYTAB_KEY in configurations: ats_hbase_app_keytab = configurations[ ATS_HBASE_PRINCIPAL_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path( kerberos_executable_search_paths) kinitcmd = format( "{kinit_path_local} -kt {ats_hbase_app_keytab} {ats_hbase_app_principal}; " ) # prevent concurrent kinit kinit_lock = global_lock.get_lock( global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=yarn_hbase_user, path=["/bin/", "/usr/bin/", "/usr/sbin/"], timeout=10) finally: kinit_lock.release() start_time = time.time() ats_hbase_status_cmd = STACK_ROOT_DEFAULT + format( "/current/hadoop-yarn-client/bin/yarn app -status ats-hbase") code, output, error = shell.checked_call( ats_hbase_status_cmd, user=yarn_hbase_user, stderr=subprocess.PIPE, timeout=check_command_timeout, logoutput=False) if code != 0: alert_label = traceback.format_exc() result_code = UKNOWN_STATUS_CODE return (result_code, [alert_label]) # Call for getting JSON ats_hbase_app_info = make_valid_json(output) if ats_hbase_app_info is None: alert_label = CRITICAL_MESSAGE result_code = CRITICAL_RESULT_CODE return (result_code, [alert_label]) if 'state' not in ats_hbase_app_info: alert_label = traceback.format_exc() result_code = UKNOWN_STATUS_CODE return (result_code, [alert_label]) retrieved_ats_hbase_app_state = ats_hbase_app_info['state'].upper() if retrieved_ats_hbase_app_state in ['STABLE']: result_code = OK_RESULT_CODE total_time = time.time() - start_time alert_label = OK_MESSAGE.format(retrieved_ats_hbase_app_state, total_time) else: result_code = CRITICAL_RESULT_CODE total_time = time.time() - start_time alert_label = CRITICAL_MESSAGE_WITH_STATE.format( retrieved_ats_hbase_app_state, total_time) except: alert_label = traceback.format_exc() traceback.format_exc() result_code = CRITICAL_RESULT_CODE return (result_code, [alert_label])
def check_thrift_port_sasl(address, port, hive_auth="NOSASL", key=None, kinitcmd=None, smokeuser='******', hive_user='******', transport_mode="binary", http_endpoint="cliservice", ssl=False, ssl_keystore=None, ssl_password=None, check_command_timeout=30, ldap_username="", ldap_password=""): """ Hive thrift SASL port check """ # check params to be correctly passed, if not - try to cast them if isinstance(port, str): port = int(port) if isinstance(ssl, str): ssl = bool(ssl) # to pass as beeline argument ssl_str = str(ssl).lower() beeline_url = [ 'jdbc:hive2://{address}:{port}/', "transportMode={transport_mode}" ] # append url according to used transport if transport_mode == "http": beeline_url.append('httpPath={http_endpoint}') # append url according to used auth if hive_auth == "NOSASL": beeline_url.append('auth=noSasl') credential_str = "-n {hive_user}" # append username and password for LDAP if hive_auth == "LDAP": # password might contain special characters that need to be escaped quoted_ldap_password = quote_bash_args(ldap_password) credential_str = "-n {ldap_username} -p {quoted_ldap_password!p}" # append url according to ssl configuration if ssl and ssl_keystore is not None and ssl_password is not None: beeline_url.extend([ 'ssl={ssl_str}', 'sslTrustStore={ssl_keystore}', 'trustStorePassword={ssl_password!p}' ]) # append url according to principal and execute kinit if kinitcmd and hive_auth != "LDAP": beeline_url.append('principal={key}') # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=smokeuser) finally: kinit_lock.release() # -n the user to connect as (ignored when using the hive principal in the URL, can be different from the user running the beeline command) # -e ';' executes a SQL commmand of NOOP cmd = ("! (beeline -u '%s' %s -e ';' 2>&1 | awk '{print}' | grep -vz -i " + \ "-e 'Connected to:' -e 'Transaction isolation:' -e 'inactive HS2 instance; use service discovery')") % \ (format(";".join(beeline_url)), format(credential_str)) Execute( cmd, user=smokeuser, path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"], timeout=check_command_timeout, timeout_kill_strategy=TerminateStrategy.KILL_PROCESS_TREE, )
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ spark_home = os.path.join(stack_root, "current", 'spark-client') if configurations is None: return ('UNKNOWN', ['There were no configurations supplied to the script.']) transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations: transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY] port = THRIFT_PORT_DEFAULT if transport_mode.lower( ) == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations: port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY]) security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' hive_kerberos_keytab = None if HIVE_SERVER2_KERBEROS_KEYTAB in configurations: hive_kerberos_keytab = configurations[HIVE_SERVER2_KERBEROS_KEYTAB] if host_name is None: host_name = socket.getfqdn() hive_principal = None if HIVE_SERVER2_PRINCIPAL_KEY in configurations: hive_principal = configurations[HIVE_SERVER2_PRINCIPAL_KEY] hive_principal = hive_principal.replace('_HOST', host_name.lower()) # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) hiveruser = HIVEUSER_DEFAULT if security_enabled: kinitcmd = format( "{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; ") # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=hiveruser) finally: kinit_lock.release() result_code = None try: if host_name is None: host_name = socket.getfqdn() if security_enabled: beeline_url = [ "'jdbc:hive2://{host_name}:{port}/default;principal={hive_principal}'", "transportMode={transport_mode}" ] else: beeline_url = [ "'jdbc:hive2://{host_name}:{port}/default'", "transportMode={transport_mode}" ] # append url according to used transport beeline_cmd = os.path.join(spark_home, "bin", "beeline") cmd = "! beeline -u %s -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'" % \ (format(" ".join(beeline_url))) start_time = time.time() try: Execute(cmd, user=hiveruser, path=[beeline_cmd], timeout=CHECK_COMMAND_TIMEOUT_DEFAULT) total_time = time.time() - start_time result_code = 'OK' label = OK_MESSAGE.format(total_time, port) except: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc()) except: label = traceback.format_exc() result_code = 'UNKNOWN' return (result_code, [label])
def check_thrift_port_sasl(address, port, hive_auth="NOSASL", key=None, kinitcmd=None, smokeuser='******', transport_mode="binary", http_endpoint="cliservice", ssl=False, ssl_keystore=None, ssl_password=None, check_command_timeout=30, ldap_username="", ldap_password=""): """ Hive thrift SASL port check """ # check params to be correctly passed, if not - try to cast them if isinstance(port, str): port = int(port) if isinstance(ssl, str): ssl = bool(ssl) # to pass as beeline argument ssl_str = str(ssl).lower() beeline_url = [ 'jdbc:hive2://{address}:{port}/', "transportMode={transport_mode}" ] # append url according to used transport if transport_mode == "http": beeline_url.append('httpPath={http_endpoint}') # append url according to used auth if hive_auth == "NOSASL": beeline_url.append('auth=noSasl') credential_str = "" # append username and password for LDAP if hive_auth == "LDAP": credential_str = "-n '{ldap_username}' -p '{ldap_password!p}'" # append url according to ssl configuration if ssl and ssl_keystore is not None and ssl_password is not None: beeline_url.extend([ 'ssl={ssl_str}', 'sslTrustStore={ssl_keystore}', 'trustStorePassword={ssl_password!p}' ]) # append url according to principal and execute kinit if kinitcmd and hive_auth != "LDAP": beeline_url.append('principal={key}') # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=smokeuser) finally: kinit_lock.release() cmd = "! beeline -u '%s' %s -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'" % \ (format(";".join(beeline_url)), format(credential_str)) Execute( cmd, user=smokeuser, path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"], timeout=check_command_timeout, timeout_kill_strategy=TerminateStrategy.KILL_PROCESS_TREE, )
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return ('UNKNOWN', ['There were no configurations supplied to the script.']) LIVY_PORT_DEFAULT = 8999 port = LIVY_PORT_DEFAULT if LIVY_SERVER_PORT_KEY in configurations: port = int(configurations[LIVY_SERVER_PORT_KEY]) if LIVY_SERVER_HOST_KEY in configurations: host_name = str(configurations[LIVY_SERVER_HOST_KEY]) if host_name is None: host_name = socket.getfqdn() livyuser = configurations[SMOKEUSER_KEY] security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' smokeuser_kerberos_keytab = None if SMOKEUSER_KEYTAB_KEY in configurations: smokeuser_kerberos_keytab = configurations[SMOKEUSER_KEYTAB_KEY] if host_name is None: host_name = socket.getfqdn() smokeuser_principal = None if SMOKEUSER_PRINCIPAL_KEY in configurations: smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] smokeuser_principal = smokeuser_principal.replace( '_HOST', host_name.lower()) # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) if security_enabled: kinitcmd = format( "{kinit_path_local} -kt {smokeuser_kerberos_keytab} {smokeuser_principal}; " ) # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=livyuser) finally: kinit_lock.release() http_scheme = 'https' if LIVY_SSL_ENABLED_KEY in configurations else 'http' result_code = None try: start_time = time.time() try: livy2_livyserver_host = str(host_name) livy_cmd = format( "curl -s -o /dev/null -w'%{{http_code}}' --negotiate -u: -k {http_scheme}://{livy2_livyserver_host}:{port}/sessions | grep 200 " ) Execute(livy_cmd, tries=3, try_sleep=1, logoutput=True, user=livyuser) total_time = time.time() - start_time result_code = 'OK' label = OK_MESSAGE.format(total_time, port) except: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc()) except: label = traceback.format_exc() result_code = 'UNKNOWN' return (result_code, [label])
def execute(configurations={}, parameters={}, host_name=None): """ Returns a tuple containing the result code and a pre-formatted result label Keyword arguments: configurations (dictionary): a mapping of configuration key to value parameters (dictionary): a mapping of script parameter key to value host_name (string): the name of this host where the alert is running """ if configurations is None: return (('UNKNOWN', ['There were no configurations supplied to the script.'])) if not HIVE_METASTORE_URIS_KEY in configurations: return (('UNKNOWN', ['Hive metastore uris were not supplied to the script.'])) metastore_uris = configurations[HIVE_METASTORE_URIS_KEY].split(',') security_enabled = False if SECURITY_ENABLED_KEY in configurations: security_enabled = str( configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE' check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT if CHECK_COMMAND_TIMEOUT_KEY in parameters: check_command_timeout = float(parameters[CHECK_COMMAND_TIMEOUT_KEY]) # defaults smokeuser_keytab = SMOKEUSER_KEYTAB_DEFAULT smokeuser_principal = SMOKEUSER_PRINCIPAL_DEFAULT smokeuser = SMOKEUSER_DEFAULT # check script params if SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY in parameters: smokeuser_principal = parameters[SMOKEUSER_PRINCIPAL_SCRIPT_PARAM_KEY] if SMOKEUSER_SCRIPT_PARAM_KEY in parameters: smokeuser = parameters[SMOKEUSER_SCRIPT_PARAM_KEY] if SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY in parameters: smokeuser_keytab = parameters[SMOKEUSER_KEYTAB_SCRIPT_PARAM_KEY] # check configurations last as they should always take precedence if SMOKEUSER_PRINCIPAL_KEY in configurations: smokeuser_principal = configurations[SMOKEUSER_PRINCIPAL_KEY] if SMOKEUSER_KEY in configurations: smokeuser = configurations[SMOKEUSER_KEY] result_code = None try: if security_enabled: if SMOKEUSER_KEYTAB_KEY in configurations: smokeuser_keytab = configurations[SMOKEUSER_KEYTAB_KEY] # Get the configured Kerberos executable search paths, if any if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations: kerberos_executable_search_paths = configurations[ KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY] else: kerberos_executable_search_paths = None kinit_path_local = get_kinit_path(kerberos_executable_search_paths) kinitcmd = format( "{kinit_path_local} -kt {smokeuser_keytab} {smokeuser_principal}; " ) # prevent concurrent kinit kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: Execute(kinitcmd, user=smokeuser, path=[ "/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/" ], timeout=10) finally: kinit_lock.release() if host_name is None: host_name = socket.getfqdn() port = None for uri in metastore_uris: if host_name in uri: parts = urlparse(uri) port = parts.port start_time = time.time() try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) result = sock.connect_ex((host_name, port)) total_time = time.time() - start_time if result == 0: result_code = 'OK' label = OK_MESSAGE.format(total_time) else: result_code = 'CRITICAL' label = NOT_LISTENING_MESSAGE.format(host_name, port) except: result_code = 'CRITICAL' label = CRITICAL_MESSAGE.format(host_name, traceback.format_exc()) except: label = traceback.format_exc() result_code = 'UNKNOWN' return ((result_code, [label]))