def status(self, env): from params import status_params env.set_params(status_params) cmd = format('curl --max-time 3 {hostname}:{metron_rest_port}') try: get_user_call_output(cmd, user=status_params.metron_user) except ExecutionFailed: raise ComponentIsNotRunning()
def service_check(cmd, user, label): """ Executes a service check command that adheres to LSB-compliant return codes. The return codes are interpreted as defined by the LSB. See http://refspecs.linuxbase.org/LSB_3.0.0/LSB-PDA/LSB-PDA/iniscrptact.html for more information. :param cmd: The service check command to execute. :param label: The name of the service. """ Logger.info("Performing service check; cmd={0}, user={1}, label={2}".format(cmd, user, label)) rc, out, err = get_user_call_output(cmd, user, is_checked_call=False) if len(err) > 0: Logger.error(err) if rc in [1, 2, 3]: # if return code in [1, 2, 3], then 'program is not running' or 'program is dead' Logger.info("{0} is not running".format(label)) raise ComponentIsNotRunning() elif rc == 0: # if return code = 0, then 'program is running or service is OK' Logger.info("{0} is running".format(label)) else: # else service state is unknown err_msg = "{0} service check failed; cmd '{1}' returned {2}".format(label, cmd, rc) Logger.error(err_msg) raise ExecutionFailed(err_msg, rc, out, err)
def get_running_topologies(params): Logger.info('Getting Running Storm Topologies from Storm REST Server') Logger.info('Security enabled? ' + str(params.security_enabled)) # Want to sudo to the metron user and kinit as them so we aren't polluting root with Metron's Kerberos tickets. # This is becuase we need to run a command with a return as the metron user. Sigh negotiate = '--negotiate -u : ' if params.security_enabled else '' cmd = ambari_format( 'curl --max-time 3 ' + negotiate + '{storm_rest_addr}/api/v1/topology/summary') if params.security_enabled: kinit(params.kinit_path_local, params.metron_keytab_path, params.metron_principal_name, execute_user=params.metron_user) Logger.info('Running cmd: ' + cmd) return_code, stdout, stderr = get_user_call_output(cmd, user=params.metron_user, is_checked_call=False) if (return_code != 0): return {} try: stormjson = json.loads(stdout) except ValueError, e: Logger.info('Stdout: ' + str(stdout)) Logger.info('Stderr: ' + str(stderr)) Logger.exception(str(e)) return {}
def start_rest_application(self): """ Start the REST application """ Logger.info('Starting REST application') if self.__params.security_enabled: kinit(self.__params.kinit_path_local, self.__params.metron_keytab_path, self.__params.metron_principal_name, execute_user=self.__params.metron_user) # Get the PID associated with the service pid_file = format("{metron_rest_pid_dir}/{metron_rest_pid}") pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=self.__params.metron_user, is_checked_call=False)[1] process_id_exists_command = format("ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") # Set the password with env variable instead of param to avoid it showing in ps cmd = format(( "export METRON_JDBC_PASSWORD={metron_jdbc_password!p};" "export JAVA_HOME={java_home};" "export METRON_REST_CLASSPATH={metron_rest_classpath};" "export METRON_INDEX_CP={metron_indexing_classpath};" "export METRON_LOG_DIR={metron_log_dir};" "export METRON_PID_FILE={pid_file};" "{metron_home}/bin/metron-rest.sh;" "unset METRON_JDBC_PASSWORD;" )) Execute(cmd, user = self.__params.metron_user, logoutput=True, not_if = process_id_exists_command, timeout=60) Logger.info('Done starting REST application')
def is_systemd_running(): """ Determines if the platform is running Systemd. :return True, if the platform is running Systemd. False, otherwise. """ Logger.info("Is the platform running Systemd?") rc, out, err = get_user_call_output("pidof systemd", "root", is_checked_call=False) if rc == 0: Logger.info("Systemd was found") return True else: Logger.info("Systemd was NOT found") return False
def stop_rest_application(self): """ Stop the REST application """ Logger.info('Stopping REST application') # Get the pid associated with the service pid_file = format("{metron_rest_pid_dir}/{metron_rest_pid}") pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=self.__params.metron_user, is_checked_call=False)[1] process_id_exists_command = format("ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if self.__params.security_enabled: kinit(self.__params.kinit_path_local, self.__params.metron_keytab_path, self.__params.metron_principal_name, execute_user=self.__params.metron_user) # Politely kill kill_cmd = ('kill', format("{pid}")) Execute(kill_cmd, sudo=True, not_if = format("! ({process_id_exists_command})") ) # Violently kill hard_kill_cmd = ('kill', '-9', format("{pid}")) wait_time = 5 Execute(hard_kill_cmd, not_if = format("! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )"), sudo=True, ignore_failures = True ) try: # check if stopped the process, else fail the task Execute(format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) except: show_logs(self.__params.metron_log_dir, self.__params.metron_user) raise File(pid_file, action = "delete") Logger.info('Done stopping REST application')
def start_rest_application(self): """ Start the REST application """ Logger.info('Starting REST application') if self.__params.security_enabled: kinit(self.__params.kinit_path_local, self.__params.metron_keytab_path, self.__params.metron_principal_name, execute_user=self.__params.metron_user) # Get the PID associated with the service pid_file = format("{metron_rest_pid_dir}/{metron_rest_pid}") pid = get_user_call_output.get_user_call_output( format("cat {pid_file}"), user=self.__params.metron_user, is_checked_call=False)[1] process_id_exists_command = format( "ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") # Set the password with env variable instead of param to avoid it showing in ps cmd = format(( "export METRON_JDBC_PASSWORD={metron_jdbc_password!p};" "export JAVA_HOME={java_home};" "export METRON_REST_CLASSPATH={metron_rest_classpath};" "export METRON_INDEX_CP={metron_indexing_classpath};" "export METRON_LOG_DIR={metron_log_dir};" "export METRON_PID_FILE={pid_file};" "export HDP_VERSION={hdp_version};" "export METRON_RA_INDEXING_WRITER={ra_indexing_writer};" "export METRON_LDAP_PASSWORD={metron_ldap_password!p};" "export METRON_LDAP_SSL_TRUSTSTORE_PASSWORD={metron_ldap_ssl_truststore_password!p};" "{metron_home}/bin/metron-rest.sh;" "unset METRON_JDBC_PASSWORD;" "unset METRON_LDAP_PASSWORD;" "unset METRON_LDAP_SSL_TRUSTSTORE_PASSWORD;")) Execute(cmd, user=self.__params.metron_user, logoutput=True, not_if=process_id_exists_command, timeout=60) Logger.info('Done starting REST application')
def get_value_from_jmx(qry, property, security_enabled, run_user, is_https_enabled): try: if security_enabled: cmd = ['curl', '--negotiate', '-u', ':', '-s'] else: cmd = ['curl', '-s'] if is_https_enabled: cmd.append("-k") cmd.append(qry) _, data, _ = get_user_call_output(cmd, user=run_user, quiet=False) if data: data_dict = json.loads(data) return data_dict["beans"][0][property] except: Logger.logger.exception("Getting jmx metrics from NN failed. URL: " + str(qry)) return None
def zeppelin_notebook_import(self, env): from params import params env.set_params(params) metron_service.check_indexer_parameters() commands = IndexingCommands(params) Logger.info(ambari_format('Searching for Zeppelin Notebooks in {metron_config_zeppelin_path}')) # Check if authentication is configured on Zeppelin server, and fetch details if enabled. session_id = commands.get_zeppelin_auth_details(params.zeppelin_server_url, env) for dirName, subdirList, files in os.walk(params.metron_config_zeppelin_path): for fileName in files: if fileName.endswith(".json"): Logger.info("Importing notebook: " + fileName) zeppelin_notebook = os.path.join(dirName, fileName) zeppelin_import_url = 'curl -i -b \"{0}\" http://{1}/api/notebook/import -d @\'{2}\'' zeppelin_import_url = zeppelin_import_url.format(session_id, params.zeppelin_server_url, zeppelin_notebook) return_code, import_result, stderr = get_user_call_output(zeppelin_import_url, user=params.metron_user) Logger.info("Status of importing notebook: " + import_result) if return_code != 0: Logger.error("Error importing notebook: " + fileName + " Error Message: " + stderr)
def run_command(self, target, operation, method='POST', assertable_result=True, file_to_put=None, ignore_status_codes=[], **kwargs): """ assertable_result - some POST requests return '{"boolean":false}' or '{"boolean":true}' depending on if query was successful or not, we can assert this for them """ target = HdfsResourceProvider.parse_path(target) url = format("{address}/webhdfs/v1{target}?op={operation}&user.name={run_user}", address=self.address, run_user=self.run_user) for k,v in kwargs.iteritems(): url = format("{url}&{k}={v}") if file_to_put and not os.path.exists(file_to_put): raise Fail(format("File {file_to_put} is not found.")) cmd = ["curl", "-sS","-L", "-w", "%{http_code}", "-X", method] if file_to_put: cmd += ["-T", file_to_put] if self.security_enabled: cmd += ["--negotiate", "-u", ":"] if self.is_https_enabled: cmd += ["-k"] cmd.append(url) _, out, err = get_user_call_output(cmd, user=self.run_user, logoutput=self.logoutput, quiet=False) status_code = out[-3:] out = out[:-3] # remove last line from output which is status code try: result_dict = json.loads(out) except ValueError: result_dict = out if status_code not in WebHDFSUtil.valid_status_codes+ignore_status_codes or assertable_result and result_dict and not result_dict['boolean']: formatted_output = json.dumps(result_dict, indent=2) if isinstance(result_dict, dict) else result_dict formatted_output = err + "\n" + formatted_output err_msg = "Execution of '%s' returned status_code=%s. %s" % (shell.string_cmd_from_args_list(cmd), status_code, formatted_output) raise Fail(err_msg) return result_dict
def get_value_from_jmx(qry, property, security_enabled, run_user, is_https_enabled): try: if security_enabled: cmd = ['curl', '--negotiate', '-u', ':', '-s'] else: cmd = ['curl', '-s'] if is_https_enabled: cmd.append("-k") cmd.append(qry) _, data, _ = get_user_call_output(cmd, user=run_user, quiet=False) if data: data_dict = json.loads(data) return data_dict["beans"][0][property] except: Logger.logger.exception("Getting jmx metrics from NN failed. URL: " + str(qry)) return None
def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, krb_exec_search_paths, return_only_http_code, caller_label, user, connection_timeout = CONNECTION_TIMEOUT_DEFAULT, kinit_timer_ms=DEFAULT_KERBEROS_KINIT_TIMER_MS, method = '',body='',header=''): """ Makes a curl request using the kerberos credentials stored in a calculated cache file. The cache file is created by combining the supplied principal, keytab, user, and request name into a unique hash. This function will use the klist command to determine if the cache is expired and will perform a kinit if necessary. Additionally, it has an internal timer to force a kinit after a configurable amount of time. This is to prevent boundary issues where requests hit the edge of a ticket's lifetime. :param tmp_dir: the directory to use for storing the local kerberos cache for this request. :param keytab: the location of the keytab to use when performing a kinit :param principal: the principal to use when performing a kinit :param url: the URL to request :param cache_file_prefix: an identifier used to build the unique cache name for this request. This ensures that multiple requests can use the same cache. :param krb_exec_search_paths: the search path to use for invoking kerberos binaries :param return_only_http_code: True to return only the HTTP code, False to return GET content :param caller_label: an identifier to give context into the caller of this module (used for logging) :param user: the user to invoke the curl command as :param connection_timeout: if specified, a connection timeout for curl (default 10 seconds) :param kinit_timer_ms: if specified, the time (in ms), before forcing a kinit even if the klist cache is still valid. :return: """ import uuid # start off false is_kinit_required = False # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. Scope # this file by user in order to prevent sharing of cache files by multiple users. ccache_file_name = _md5("{0}|{1}".format(principal, keytab)).hexdigest() curl_krb_cache_path = os.path.join(tmp_dir, "curl_krb_cache") if not os.path.exists(curl_krb_cache_path): os.makedirs(curl_krb_cache_path) os.chmod(curl_krb_cache_path, 0777) ccache_file_path = "{0}{1}{2}_{3}_cc_{4}".format(curl_krb_cache_path, os.sep, cache_file_prefix, user, ccache_file_name) kerberos_env = {'KRB5CCNAME': ccache_file_path} # concurrent kinit's can cause the following error: # Internal credentials cache error while storing credentials while getting initial credentials kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache if krb_exec_search_paths: klist_path_local = get_klist_path(krb_exec_search_paths) else: klist_path_local = get_klist_path() # take a look at the last time kinit was run for the specified cache and force a new # kinit if it's time; this helps to avoid problems approaching ticket boundary when # executing a klist and then a curl last_kinit_time = _KINIT_CACHE_TIMES.get(ccache_file_name, 0) current_time = long(time.time()) if current_time - kinit_timer_ms > last_kinit_time: is_kinit_required = True # if the time has not expired, double-check that the cache still has a valid ticket if not is_kinit_required: klist_command = "{0} -s {1}".format(klist_path_local, ccache_file_path) is_kinit_required = (shell.call(klist_command, user=user)[0] != 0) # if kinit is required, the perform the kinit if is_kinit_required: if krb_exec_search_paths: kinit_path_local = get_kinit_path(krb_exec_search_paths) else: kinit_path_local = get_kinit_path() logger.debug("Enabling Kerberos authentication for %s via GSSAPI using ccache at %s", caller_label, ccache_file_path) # kinit; there's no need to set a ticket timeout as this will use the default invalidation # configured in the krb5.conf - regenerating keytabs will not prevent an existing cache # from working correctly shell.checked_call("{0} -c {1} -kt {2} {3} > /dev/null".format(kinit_path_local, ccache_file_path, keytab, principal), user=user) # record kinit time _KINIT_CACHE_TIMES[ccache_file_name] = current_time else: # no kinit needed, use the cache logger.debug("Kerberos authentication for %s via GSSAPI already enabled using ccache at %s.", caller_label, ccache_file_path) finally: kinit_lock.release() # check if cookies dir exists, if not then create it cookies_dir = os.path.join(tmp_dir, "cookies") if not os.path.exists(cookies_dir): os.makedirs(cookies_dir) cookie_file_name = str(uuid.uuid4()) cookie_file = os.path.join(cookies_dir, cookie_file_name) start_time = time.time() error_msg = None # setup timeouts for the request; ensure we use integers since that is what curl needs connection_timeout = int(connection_timeout) maximum_timeout = connection_timeout + 2 try: if return_only_http_code: _, curl_stdout, curl_stderr = get_user_call_output(['curl', '-L', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, '-w', '%{http_code}', url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout), '-o', '/dev/null'], user=user, env=kerberos_env) else: curl_command = ['curl', '-L', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout)] # returns response body if len(method) > 0 and len(body) == 0 and len(header) == 0: curl_command.extend(['-X', method]) elif len(method) > 0 and len(body) == 0 and len(header) > 0: curl_command.extend(['-H', header, '-X', method]) elif len(method) > 0 and len(body) > 0 and len(header) == 0: curl_command.extend(['-X', method, '-d', body]) elif len(method) > 0 and len(body) > 0 and len(header) > 0: curl_command.extend(['-H', header, '-X', method, '-d', body]) _, curl_stdout, curl_stderr = get_user_call_output(curl_command, user=user, env=kerberos_env) except Fail: if logger.isEnabledFor(logging.DEBUG): logger.exception("Unable to make a curl request for {0}.".format(caller_label)) raise finally: if os.path.isfile(cookie_file): os.remove(cookie_file) # empty quotes evaluates to false if curl_stderr: error_msg = curl_stderr time_millis = time.time() - start_time # empty quotes evaluates to false if curl_stdout: if return_only_http_code: return (int(curl_stdout), error_msg, time_millis) else: return (curl_stdout, error_msg, time_millis) logger.debug("The curl response for %s is empty; standard error = %s", caller_label, str(error_msg)) return ("", error_msg, time_millis)
def service_check(self, env): import params env.set_params(params) if params.hdp_stack_version_major != "" and compare_versions( params.hdp_stack_version_major, '2.2') >= 0: path_to_distributed_shell_jar = "/usr/hdp/current/hadoop-yarn-client/hadoop-yarn-applications-distributedshell.jar" else: path_to_distributed_shell_jar = "/usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell*.jar" yarn_distrubuted_shell_check_cmd = format( "yarn org.apache.hadoop.yarn.applications.distributedshell.Client " "-shell_command ls -num_containers {number_of_nm} -jar {path_to_distributed_shell_jar}" ) if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) smoke_cmd = format( "{kinit_cmd} {yarn_distrubuted_shell_check_cmd}") else: smoke_cmd = yarn_distrubuted_shell_check_cmd return_code, out = shell.checked_call( smoke_cmd, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', user=params.smokeuser, ) m = re.search("appTrackingUrl=(.*),\s", out) app_url = m.group(1) splitted_app_url = str(app_url).split('/') for item in splitted_app_url: if "application" in item: application_name = item json_response_received = False for rm_host in params.rm_hosts: info_app_url = params.scheme + "://" + rm_host + ":" + params.rm_active_port + "/ws/v1/cluster/apps/" + application_name get_app_info_cmd = "curl --negotiate -u : -ksL --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) try: json_response = json.loads(stdout) json_response_received = True if json_response['app']['state'] != "FINISHED" or json_response[ 'app']['finalStatus'] != "SUCCEEDED": raise Exception( "Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED." ) except Exception as e: pass if not json_response_received: raise Exception("Could not get json response from YARN API")
def prepare_war(): """ Attempt to call prepare-war command if the marker files don't exist or their content doesn't equal the expected. The marker file for a command is stored in <stack-root>/current/oozie-server/.prepare_war_cmd The marker file for a content of libext folder is stored in <stack-root>/current/oozie-server/.war_libext_content """ import params prepare_war_cmd_file = format("{oozie_home}/.prepare_war_cmd") libext_content_file = format("{oozie_home}/.war_libext_content") list_libext_command = format( "ls -l {oozie_libext_dir}") + " | awk '{print $9, $5}' | awk 'NF > 0'" # DON'T CHANGE THE VALUE SINCE IT'S USED TO DETERMINE WHETHER TO RUN THE COMMAND OR NOT BY READING THE MARKER FILE. # Oozie tmp dir should be /var/tmp/oozie and is already created by a function above. command = format( "cd {oozie_tmp_dir} && {oozie_setup_sh} prepare-war {oozie_secure}") command = command.strip() run_prepare_war = False if os.path.exists(prepare_war_cmd_file): cmd = "" with open(prepare_war_cmd_file, "r") as f: cmd = f.readline().strip() if command != cmd: run_prepare_war = True Logger.info(format("Will run prepare war cmd since marker file {prepare_war_cmd_file} has contents which differ.\n" \ "Expected: {command}.\nActual: {cmd}.")) else: run_prepare_war = True Logger.info( format( "Will run prepare war cmd since marker file {prepare_war_cmd_file} is missing." )) return_code, libext_content, error_output = get_user_call_output( list_libext_command, user=params.oozie_user) libext_content = libext_content.strip() if run_prepare_war == False: if os.path.exists(libext_content_file): old_content = "" with open(libext_content_file, "r") as f: old_content = f.read().strip() if libext_content != old_content: run_prepare_war = True Logger.info(format("Will run prepare war cmd since marker file {libext_content_file} has contents which differ.\n" \ "Content of the folder {oozie_libext_dir} changed.")) else: run_prepare_war = True Logger.info( format( "Will run prepare war cmd since marker file {libext_content_file} is missing." )) if run_prepare_war: # Time-consuming to run return_code, output = shell.call(command, user=params.oozie_user) if output is None: output = "" if return_code != 0 or "New Oozie WAR file with added".lower( ) not in output.lower(): message = "Unexpected Oozie WAR preparation output {0}".format( output) Logger.error(message) raise Fail(message) # Generate marker files File( prepare_war_cmd_file, content=command, mode=0644, ) File( libext_content_file, content=libext_content, mode=0644, ) else: Logger.info( format( "No need to run prepare-war since marker file {prepare_war_cmd_file} already exists." ))
def hive_service(name, action='start', upgrade_type=None): import params if name == 'metastore': pid_file = format("{hive_pid_dir}/{hive_metastore_pid}") cmd = format("{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.log {pid_file} {hive_server_conf_dir} {hive_log_dir}") elif name == 'hiveserver2': pid_file = format("{hive_pid_dir}/{hive_pid}") cmd = format("{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.log {pid_file} {hive_server_conf_dir} {hive_log_dir}") if params.security_enabled and params.current_version != None and (params.current_version.startswith("2.2.4") or params.current_version.startswith("2.2.3")): hive_kinit_cmd = format("{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; ") Execute(hive_kinit_cmd, user=params.hive_user) pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=params.hive_user, is_checked_call=False)[1] process_id_exists_command = format("ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if action == 'start': if name == 'hiveserver2': check_fs_root() daemon_cmd = cmd hadoop_home = params.hadoop_home hive_bin = "hive" # upgrading hiveserver2 (rolling_restart) means that there is an existing, # de-registering hiveserver2; the pid will still exist, but the new # hiveserver is spinning up on a new port, so the pid will be re-written if upgrade_type == UPGRADE_TYPE_ROLLING: process_id_exists_command = None if params.version: import os hadoop_home = format("/usr/hdp/{version}/hadoop") hive_bin = os.path.join(params.hive_bin, hive_bin) Execute(daemon_cmd, user = params.hive_user, environment = { 'HADOOP_HOME': hadoop_home, 'JAVA_HOME': params.java64_home, 'HIVE_BIN': hive_bin }, path = params.execute_path, not_if = process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": db_connection_check_command = format( "{java64_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}") Execute(db_connection_check_command, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', tries=5, try_sleep=10) elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}") Execute(daemon_kill_cmd, not_if = format("! ({process_id_exists_command})") ) wait_time = 5 Execute(daemon_hard_kill_cmd, not_if = format("! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )") ) # check if stopped the process, else fail the task Execute(format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) File(pid_file, action = "delete" )
def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, krb_exec_search_paths, return_only_http_code, alert_name, user, connection_timeout=CONNECTION_TIMEOUT_DEFAULT): import uuid # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. Scope # this file by user in order to prevent sharing of cache files by multiple users. ccache_file_name = _md5("{0}|{1}".format(principal, keytab)).hexdigest() ccache_file_path = "{0}{1}{2}_{3}_cc_{4}".format(tmp_dir, os.sep, cache_file_prefix, user, ccache_file_name) kerberos_env = {'KRB5CCNAME': ccache_file_path} # concurrent kinit's can cause the following error: # Internal credentials cache error while storing credentials while getting initial credentials kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache if krb_exec_search_paths: klist_path_local = get_klist_path(krb_exec_search_paths) else: klist_path_local = get_klist_path() if shell.call("{0} -s {1}".format(klist_path_local, ccache_file_path), user=user)[0] != 0: if krb_exec_search_paths: kinit_path_local = get_kinit_path(krb_exec_search_paths) else: kinit_path_local = get_kinit_path() logger.debug( "[Alert][{0}] Enabling Kerberos authentication via GSSAPI using ccache at {1}." .format(alert_name, ccache_file_path)) shell.checked_call( "{0} -l 5m -c {1} -kt {2} {3} > /dev/null".format( kinit_path_local, ccache_file_path, keytab, principal), user=user) else: logger.debug( "[Alert][{0}] Kerberos authentication via GSSAPI already enabled using ccache at {1}." .format(alert_name, ccache_file_path)) finally: kinit_lock.release() # check if cookies dir exists, if not then create it cookies_dir = os.path.join(tmp_dir, "cookies") if not os.path.exists(cookies_dir): os.makedirs(cookies_dir) cookie_file_name = str(uuid.uuid4()) cookie_file = os.path.join(cookies_dir, cookie_file_name) start_time = time.time() error_msg = None # setup timeouts for the request; ensure we use integers since that is what curl needs connection_timeout = int(connection_timeout) maximum_timeout = connection_timeout + 2 try: if return_only_http_code: _, curl_stdout, curl_stderr = get_user_call_output( [ 'curl', '-L', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, '-w', '%{http_code}', url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout), '-o', '/dev/null' ], user=user, env=kerberos_env) else: # returns response body _, curl_stdout, curl_stderr = get_user_call_output( [ 'curl', '-L', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout) ], user=user, env=kerberos_env) except Fail: if logger.isEnabledFor(logging.DEBUG): logger.exception( "[Alert][{0}] Unable to make a web request.".format( alert_name)) raise finally: if os.path.isfile(cookie_file): os.remove(cookie_file) # empty quotes evaluates to false if curl_stderr: error_msg = curl_stderr time_millis = time.time() - start_time # empty quotes evaluates to false if curl_stdout: if return_only_http_code: return (int(curl_stdout), error_msg, time_millis) else: return (curl_stdout, error_msg, time_millis) logger.debug( "[Alert][{0}] Curl response is empty! Please take a look at error message: " .format(alert_name, str(error_msg))) return ("", error_msg, time_millis)
def hive_service_interactive(name, action='start', upgrade_type=None): import params pid_file = format("{hive_pid_dir}/{hive_interactive_pid}") cmd = format( "{start_hiveserver2_interactive_path} {hive_pid_dir}/hive-server2-interactive.out {hive_log_dir}/hive-server2-interactive.err {pid_file} {hive_server_interactive_conf_dir} {hive_log_dir}" ) # TODO : Kerberos work for Hive2 pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=params.hive_user, is_checked_call=False)[1] process_id_exists_command = format( "ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if action == 'start': check_fs_root(params.hive_server_interactive_conf_dir, params.execute_path_hive_interactive) daemon_cmd = cmd hadoop_home = params.hadoop_home hive_interactive_bin = "hive2" # TODO : Upgrade checks required here. Execute(daemon_cmd, user=params.hive_user, environment={ 'HADOOP_HOME': hadoop_home, 'JAVA_HOME': params.java64_home, 'HIVE_BIN': hive_interactive_bin }, path=params.execute_path, not_if=process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": db_connection_check_command = format( "{java64_home}/bin/java -cp {check_db_connection_jar}:{target_hive_interactive} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}" ) Execute(db_connection_check_command, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', tries=5, try_sleep=10) elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}") Execute(daemon_kill_cmd, not_if=format("! ({process_id_exists_command})")) wait_time = 5 Execute( daemon_hard_kill_cmd, not_if=format( "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )" )) # check if stopped the process, else fail the task Execute( format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) File(pid_file, action="delete")
def beacon(type, action=None, upgrade_type=None): import params if action == 'config': create_directory(params.beacon_home_dir) create_directory(params.beacon_plugin_staging_dir) cloud_cred_provider = params.beacon_cloud_cred_provider_dir.split('://')[1] cloud_cred_parts = cloud_cred_provider.split('/', 1) create_directory("/" + cloud_cred_parts[1], cloud_cred_parts[0]) if params.is_hive_installed: if not isinstance(params.hive_repl_cmrootdir, UnknownConfiguration): beacon_utils.create_hdfs_directory(params.hive_repl_cmrootdir, params.hive_user, 01777) if not isinstance(params.hive_repl_rootdir, UnknownConfiguration): beacon_utils.create_hdfs_directory(params.hive_repl_rootdir, params.hive_user, 0700) Directory(params.beacon_pid_dir, owner=params.beacon_user, create_parents=True, mode=0755, cd_access="a", ) Directory(params.beacon_data_dir, owner=params.beacon_user, create_parents=True, mode=0755, cd_access="a", ) Directory(params.beacon_log_dir, owner=params.beacon_user, create_parents=True, mode=0755, cd_access="a", ) Directory(params.beacon_webapp_dir, owner=params.beacon_user, create_parents=True) Directory(params.beacon_home, owner=params.beacon_user, create_parents=True) Directory(params.etc_prefix_dir, mode=0755, create_parents=True) Directory(params.beacon_conf_dir, owner=params.beacon_user, create_parents=True) environment_dictionary = { "HADOOP_HOME": params.hadoop_home_dir, "JAVA_HOME": params.java_home, "BEACON_LOG_DIR": params.beacon_log_dir, "BEACON_PID_DIR": params.beacon_pid_dir, "BEACON_DATA_DIR": params.beacon_data_dir, "BEACON_CLUSTER": params.beacon_cluster_name, "HADOOP_CONF": params.hadoop_conf_dir } pid = get_user_call_output.get_user_call_output(format("cat {server_pid_file}"), user=params.beacon_user, is_checked_call=False)[1] process_exists = format("ls {server_pid_file} && ps -p {pid}") if type == 'server': if action == 'start': try: if params.credential_store_enabled: if 'hadoop.security.credential.provider.path' in params.beacon_env: credential_provider_path = params.beacon_env['hadoop.security.credential.provider.path'] credential_provider_src_path = credential_provider_path[len('jceks://file'):] File(params.beacon_credential_provider_path[len('jceks://file'):], owner=params.beacon_user, group=params.user_group, mode=0640, content=StaticFile(credential_provider_src_path) ) else: Logger.error( "hadoop.security.credential.provider.path property not found in beacon-env config-type") File(os.path.join(params.beacon_conf_dir, 'beacon.yml'), owner='root', group='root', mode=0644, content=Template("beacon.yml.j2") ) params.beacon_security_site = update_credential_provider_path( params.beacon_security_site, 'beacon-security-site', os.path.join(params.beacon_conf_dir, 'beacon-security-site.jceks'), params.beacon_user, params.user_group ) XmlConfig("beacon-security-site.xml", conf_dir=params.beacon_conf_dir, configurations=params.beacon_security_site, configuration_attributes=params.config['configuration_attributes']['beacon-security-site'], owner=params.beacon_user, group=params.user_group, mode=0644 ) Execute(format('{beacon_home}/bin/beacon setup'), user=params.beacon_user, path=params.hadoop_bin_dir, environment=environment_dictionary ) if params.download_mysql_driver: download_mysql_driver() Execute(format('{beacon_home}/bin/beacon start'), user=params.beacon_user, path=params.hadoop_bin_dir, environment=environment_dictionary, not_if=process_exists, ) if params.has_ranger_admin: ranger_admin_url = params.config['configurations']['admin-properties']['policymgr_external_url'] ranger_admin_user = params.config['configurations']['ranger-env']['admin_username'] ranger_admin_passwd = params.config['configurations']['ranger-env']['admin_password'] if not params.security_enabled: # Creating/Updating beacon.ranger.user with role "ROLE_SYS_ADMIN" response_user = ranger_api_functions.get_user(ranger_admin_url, params.beacon_ranger_user, format( "{ranger_admin_user}:{ranger_admin_passwd}")) if response_user is not None and response_user['name'] == params.beacon_ranger_user: response_user_role = response_user['userRoleList'][0] Logger.info(format( "Beacon Ranger User with username {beacon_ranger_user} exists with role {response_user_role}")) if response_user_role != "ROLE_SYS_ADMIN": response_user_role = ranger_api_functions.update_user_role(ranger_admin_url, params.beacon_ranger_user, "ROLE_SYS_ADMIN", format( "{ranger_admin_user}:{ranger_admin_passwd}")) else: response_code = ranger_api_functions.create_user(ranger_admin_url, params.beacon_ranger_user, params.beacon_ranger_password, "ROLE_SYS_ADMIN", format( "{ranger_admin_user}:{ranger_admin_passwd}")) # Updating beacon_user role depending upon cluster environment count = 0 while count < 10: beacon_user_get = ranger_api_functions.get_user(ranger_admin_url, params.beacon_user, format( "{ranger_admin_user}:{ranger_admin_passwd}")) if beacon_user_get is not None: break else: time.sleep(10) # delay for 10 seconds count = count + 1 Logger.error( format('Retrying to fetch {beacon_user} user from Ranger Admin for {count} time(s)')) if beacon_user_get is not None and beacon_user_get['name'] == params.beacon_user: beacon_user_get_role = beacon_user_get['userRoleList'][0] if params.security_enabled and beacon_user_get_role != "ROLE_SYS_ADMIN": beacon_service_user = ranger_api_functions.update_user_role(ranger_admin_url, params.beacon_user, "ROLE_SYS_ADMIN", format( "{ranger_admin_user}:{ranger_admin_passwd}")) elif not params.security_enabled and beacon_user_get_role != "ROLE_USER": beacon_service_user = ranger_api_functions.update_user_role(ranger_admin_url, params.beacon_user, "ROLE_USER", format( "{ranger_admin_user}:{ranger_admin_passwd}")) if params.ranger_hive_plugin_enabled: # Get Ranger Hive default policy for resource database, table, column response_policy = ranger_api_functions.get_ranger_service_default_policy(ranger_admin_url, params.service_name, format( "{ranger_admin_user}:{ranger_admin_passwd}"), ['database', 'table', 'column']) if response_policy: user_present = ranger_api_functions.check_user_policy(response_policy, params.beacon_user) if not user_present and beacon_user_get is not None and beacon_user_get[ 'name'] == params.beacon_user: policy_id = response_policy['id'] beacon_user_policy_item = {'groups': [], 'conditions': [], 'users': [params.beacon_user], 'accesses': [{'isAllowed': True, 'type': 'all'}, {'isAllowed': True, 'type': 'repladmin'}], 'delegateAdmin': False} policy_data = ranger_api_functions.update_policy_item(response_policy, beacon_user_policy_item) update_policy_response = ranger_api_functions.update_policy(ranger_admin_url, policy_id, policy_data, format( "{ranger_admin_user}:{ranger_admin_passwd}")) # Get Ranger Hive default policy for resource hiveservice response_policy = ranger_api_functions.get_ranger_service_default_policy(ranger_admin_url, params.service_name, format( "{ranger_admin_user}:{ranger_admin_passwd}"), ['hiveservice']) if response_policy: user_present = ranger_api_functions.check_user_policy(response_policy, params.beacon_user) if not user_present and beacon_user_get is not None and beacon_user_get[ 'name'] == params.beacon_user: # Updating beacon_user in Ranger Hive default policy for resource hiveservice policy_id = response_policy['id'] beacon_user_policy_item = {'groups': [], 'conditions': [], 'users': [params.beacon_user], 'accesses': [{'isAllowed': True, 'type': 'serviceadmin'}], 'delegateAdmin': False} policy_data = ranger_api_functions.update_policy_item(response_policy, beacon_user_policy_item) update_policy_response = ranger_api_functions.update_policy(ranger_admin_url, policy_id, policy_data, format( "{ranger_admin_user}:{ranger_admin_passwd}")) if params.ranger_atlas_plugin_enabled: # Creating beacon.atlas.user with role "ROLE_USER" beacon_atlas_user_response = ranger_api_functions.get_user(ranger_admin_url, params.beacon_atlas_user, format( "{ranger_admin_user}:{ranger_admin_passwd}")) if beacon_atlas_user_response is not None and beacon_atlas_user_response[ 'name'] == params.beacon_atlas_user: beacon_atlas_user_role = beacon_atlas_user_response['userRoleList'][0] Logger.info(format( "Beacon Atlas User with username {beacon_atlas_user} exists with role {beacon_atlas_user_role}")) else: beacon_atlas_user_create_response_code = ranger_api_functions.create_user(ranger_admin_url, params.beacon_atlas_user, params.beacon_atlas_password, "ROLE_USER", format( "{ranger_admin_user}:{ranger_admin_passwd}")) if params.security_enabled: get_beacon_atlas_user = params.beacon_user else: get_beacon_atlas_user = params.beacon_atlas_user if params.is_stack_3_0_or_further: # Get Ranger Atlas default policy for ENTITY TYPE, ENTITY CLASSIFICATION and ENTITY ID resource atlas_entity_policy_response = ranger_api_functions.get_ranger_service_default_policy( ranger_admin_url, params.ranger_atlas_service_name, format("{ranger_admin_user}:{ranger_admin_passwd}"), ['entity', 'entity-classification', 'entity-type']) if atlas_entity_policy_response: beacon_atlas_user_present = ranger_api_functions.check_user_policy( atlas_entity_policy_response, get_beacon_atlas_user) if not beacon_atlas_user_present: # Updating beacon atlas user in Ranger Atlas default policy for entity resource atlas_entity_policy_id = atlas_entity_policy_response['id'] beacon_atlas_user_policy_item = {'groups': [], 'conditions': [], 'users': [get_beacon_atlas_user], 'accesses': [ {'type': 'entity-read', 'isAllowed': True}, {'type': 'entity-create', 'isAllowed': True}, {'type': 'entity-update', 'isAllowed': True}]} atlas_entity_policy_data = ranger_api_functions.update_policy_item( atlas_entity_policy_response, beacon_atlas_user_policy_item) atlas_update_entity_policy_response = ranger_api_functions.update_policy( ranger_admin_url, atlas_entity_policy_id, atlas_entity_policy_data, format("{ranger_admin_user}:{ranger_admin_passwd}")) # Get Ranger Atlas default policy for ATLAS SERVICE resource atlas_service_policy_response = ranger_api_functions.get_ranger_service_default_policy( ranger_admin_url, params.ranger_atlas_service_name, format("{ranger_admin_user}:{ranger_admin_passwd}"), ['atlas-service']) if atlas_service_policy_response: beacon_atlas_user_present = ranger_api_functions.check_user_policy( atlas_service_policy_response, get_beacon_atlas_user) if not beacon_atlas_user_present: # Updating beacon atlas user in Ranger Atlas default policy for service resource atlas_service_policy_id = atlas_service_policy_response['id'] beacon_atlas_user_policy_item = {'groups': [], 'conditions': [], 'users': [get_beacon_atlas_user], 'accesses': [ {'type': 'admin-export', 'isAllowed': True}, {'type': 'admin-import', 'isAllowed': True}]} atlas_service_policy_data = ranger_api_functions.update_policy_item( atlas_service_policy_response, beacon_atlas_user_policy_item) atlas_service_policy_update_response = ranger_api_functions.update_policy( ranger_admin_url, atlas_service_policy_id, atlas_service_policy_data, format("{ranger_admin_user}:{ranger_admin_passwd}")) # Get Ranger Atlas default policy for TYPE CATEGORY and TYPE resource atlas_type_category_policy_response = ranger_api_functions.get_ranger_service_default_policy( ranger_admin_url, params.ranger_atlas_service_name, format("{ranger_admin_user}:{ranger_admin_passwd}"), ['type', 'type-category']) if atlas_type_category_policy_response: beacon_atlas_user_present = ranger_api_functions.check_user_policy( atlas_type_category_policy_response, get_beacon_atlas_user) if not beacon_atlas_user_present: # Updating beacon atlas user in Ranger Atlas default policy for type category and type resource atlas_type_category_policy_id = atlas_type_category_policy_response['id'] beacon_atlas_user_policy_item = {'groups': [], 'conditions': [], 'users': [get_beacon_atlas_user], 'accesses': [ {'type': 'type-create', 'isAllowed': True}, {'type': 'type-update', 'isAllowed': True}, {'type': 'type-delete', 'isAllowed': True}]} atlas_type_category_policy_data = ranger_api_functions.update_policy_item( atlas_type_category_policy_response, beacon_atlas_user_policy_item) atlas_update_type_category_policy_response = ranger_api_functions.update_policy( ranger_admin_url, atlas_type_category_policy_id, atlas_type_category_policy_data, format("{ranger_admin_user}:{ranger_admin_passwd}")) else: # Get Ranger Atlas default policy for ENTITY resource atlas_policy_response = ranger_api_functions.get_ranger_service_default_policy( ranger_admin_url, params.ranger_atlas_service_name, format("{ranger_admin_user}:{ranger_admin_passwd}"), ['entity']) if atlas_policy_response: beacon_atlas_user_present = ranger_api_functions.check_user_policy( atlas_policy_response, get_beacon_atlas_user) if not beacon_atlas_user_present: # Updating beacon atlas user in Ranger Atlas default policy for entity resource atlas_policy_id = atlas_policy_response['id'] beacon_atlas_user_policy_item = {'groups': [], 'conditions': [], 'users': [get_beacon_atlas_user], 'accesses': [{'type': 'read', 'isAllowed': True}, {'type': 'create', 'isAllowed': True}, {'type': 'update', 'isAllowed': True}, {'type': 'delete', 'isAllowed': True}, {'type': 'all', 'isAllowed': True}]} atlas_policy_data = ranger_api_functions.update_policy_item(atlas_policy_response, beacon_atlas_user_policy_item) atlas_update_policy_response = ranger_api_functions.update_policy(ranger_admin_url, atlas_policy_id, atlas_policy_data, format( "{ranger_admin_user}:{ranger_admin_passwd}")) # Get Ranger Atlas default policy for OPERATION resource atlas_operation_policy_response = ranger_api_functions.get_ranger_service_default_policy( ranger_admin_url, params.ranger_atlas_service_name, format("{ranger_admin_user}:{ranger_admin_passwd}"), ['operation']) if atlas_operation_policy_response: beacon_atlas_user_present = ranger_api_functions.check_user_policy( atlas_operation_policy_response, get_beacon_atlas_user) if not beacon_atlas_user_present: # Updating beacon atlas user in Ranger Atlas default policy for operation resource atlas_operation_policy_id = atlas_operation_policy_response['id'] beacon_atlas_user_policy_item = {'groups': [], 'conditions': [], 'users': [get_beacon_atlas_user], 'accesses': [{'type': 'read', 'isAllowed': True}, {'type': 'create', 'isAllowed': True}, {'type': 'update', 'isAllowed': True}, {'type': 'delete', 'isAllowed': True}, {'type': 'all', 'isAllowed': True}]} atlas_operation_policy_data = ranger_api_functions.update_policy_item( atlas_operation_policy_response, beacon_atlas_user_policy_item) atlas_operation_policy_update_response = ranger_api_functions.update_policy( ranger_admin_url, atlas_operation_policy_id, atlas_operation_policy_data, format("{ranger_admin_user}:{ranger_admin_passwd}")) except Exception as e: show_logs(params.beacon_log_dir, params.beacon_user) if action == 'stop': try: Execute(format('{beacon_home}/bin/beacon stop'), user=params.beacon_user, path=params.hadoop_bin_dir, environment=environment_dictionary) except: show_logs(params.beacon_log_dir, params.beacon_user) File(params.server_pid_file, action='delete')
def hive_service(name, action='start', upgrade_type=None): import params if name == 'metastore': pid_file = format("{hive_pid_dir}/{hive_metastore_pid}") cmd = format("{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.err {pid_file} {hive_server_conf_dir} {hive_log_dir}") elif name == 'hiveserver2': pid_file = format("{hive_pid_dir}/{hive_pid}") cmd = format("{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.err {pid_file} {hive_server_conf_dir} {hive_log_dir}") if params.security_enabled and params.current_version and check_stack_feature(StackFeature.HIVE_SERVER2_KERBERIZED_ENV, params.current_version): hive_kinit_cmd = format("{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; ") Execute(hive_kinit_cmd, user=params.hive_user) pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=params.hive_user, is_checked_call=False)[1] process_id_exists_command = format("ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if action == 'start': if name == 'hiveserver2': check_fs_root(params.hive_server_conf_dir, params.execute_path) daemon_cmd = cmd hadoop_home = params.hadoop_home hive_bin = "hive" # upgrading hiveserver2 (rolling_restart) means that there is an existing, # de-registering hiveserver2; the pid will still exist, but the new # hiveserver is spinning up on a new port, so the pid will be re-written if upgrade_type == UPGRADE_TYPE_ROLLING: process_id_exists_command = None if params.version and params.stack_root: hadoop_home = format("{stack_root}/{version}/hadoop") hive_bin = os.path.join(params.hive_bin, hive_bin) Execute(daemon_cmd, user = params.hive_user, environment = { 'HADOOP_HOME': hadoop_home, 'JAVA_HOME': params.java64_home, 'HIVE_BIN': hive_bin }, path = params.execute_path, not_if = process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": path_to_jdbc = params.target_hive if not params.jdbc_jar_name: path_to_jdbc = format("{hive_lib}/") + params.default_connectors_map[params.hive_jdbc_driver] if not os.path.isfile(path_to_jdbc): path_to_jdbc = format("{hive_lib}/") + "*" print "Sorry, but we can't find jdbc driver with default name " + params.default_connectors_map[params.hive_jdbc_driver] + \ " in hive lib dir. So, db connection check can fail. Please run 'ambari-server setup --jdbc-db={db_name} --jdbc-driver={path_to_jdbc} on server host.'" db_connection_check_command = format( "{java64_home}/bin/java -cp {check_db_connection_jar}:{path_to_jdbc} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}") try: Execute(db_connection_check_command, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', tries=5, try_sleep=10) except: show_logs(params.hive_log_dir, params.hive_user) raise elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}") Execute(daemon_kill_cmd, not_if = format("! ({process_id_exists_command})") ) wait_time = 5 Execute(daemon_hard_kill_cmd, not_if = format("! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )"), ignore_failures = True ) try: # check if stopped the process, else fail the task Execute(format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) except: show_logs(params.hive_log_dir, params.hive_user) raise File(pid_file, action = "delete" )
def hive_service(name, action='start', upgrade_type=None): import params import status_params if name == 'metastore': pid_file = status_params.hive_metastore_pid cmd = format( "{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.err {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) elif name == 'hiveserver2': pid_file = status_params.hive_pid cmd = format( "{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.err {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) if params.security_enabled: hive_kinit_cmd = format( "{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; " ) Execute(hive_kinit_cmd, user=params.hive_user) pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=params.hive_user, is_checked_call=False)[1] process_id_exists_command = format( "ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if action == 'start': if name == 'hiveserver2': check_fs_root(params.hive_server_conf_dir, params.execute_path) daemon_cmd = cmd hadoop_home = params.hadoop_home hive_bin = "hive" Execute(daemon_cmd, user=params.hive_user, environment={ 'HADOOP_HOME': hadoop_home, 'JAVA_HOME': params.java64_home, 'HIVE_BIN': hive_bin }, path=params.execute_path, not_if=process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": validation_called = False if params.hive_jdbc_target is not None: validation_called = True validate_connection(params.hive_jdbc_target, params.hive_lib) if params.hive2_jdbc_target is not None: validation_called = True validate_connection(params.hive2_jdbc_target, params.hive_server2_hive2_lib) if not validation_called: emessage = "ERROR! DB connection check should be executed at least one time!" Logger.error(emessage) elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}") Execute(daemon_kill_cmd, not_if=format("! ({process_id_exists_command})")) wait_time = 5 Execute( daemon_hard_kill_cmd, not_if=format( "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )" ), ignore_failures=True) try: # check if stopped the process, else fail the task Execute( format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) except: show_logs(params.hive_log_dir, params.hive_user) raise File(pid_file, action="delete")
def service(name, action='start'): import params import status_params pid_file = status_params.pid_files[name] no_op_test = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ), user=params.storm_user) if name == 'ui': process_grep = "storm.ui.core$" elif name == "rest_api": process_grep = format("{rest_lib_dir}/storm-rest-.*\.jar$") else: process_grep = format("storm.daemon.{name}$") find_proc = format("{jps_binary} -l | grep {process_grep}") write_pid = format("{find_proc} | awk {{'print $1'}} > {pid_file}") crt_pid_cmd = format("{find_proc} && {write_pid}") storm_env = format( "source {conf_dir}/storm-env.sh ; export PATH=$JAVA_HOME/bin:$PATH") if action == "start": if name == "rest_api": process_cmd = format( "{storm_env} ; java -jar {rest_lib_dir}/`ls {rest_lib_dir} | grep -wE storm-rest-[0-9.-]+\.jar` server" ) cmd = format( "{process_cmd} {rest_api_conf_file} > {log_dir}/restapi.log 2>&1" ) else: # Storm start script gets forked into actual storm java process. # Which means we can use the pid of start script as a pid of start component cmd = format( "{storm_env} ; storm {name} > {log_dir}/{name}.out 2>&1") cmd = format("{cmd} &\n echo $! > {pid_file}") Execute( cmd, not_if=no_op_test, user=params.storm_user, path=params.storm_bin_dir, ) File(pid_file, owner=params.storm_user, group=params.user_group) elif action == "stop": process_dont_exist = format("! ({no_op_test})") if os.path.exists(pid_file): pid = get_user_call_output.get_user_call_output( format("! test -f {pid_file} || cat {pid_file}"), user=params.storm_user)[1] # if multiple processes are running (for example user can start logviewer from console) # there can be more than one id pid = pid.replace("\n", " ") Execute(format("{sudo} kill {pid}"), not_if=process_dont_exist) Execute( format("{sudo} kill -9 {pid}"), not_if=format( "sleep 2; {process_dont_exist} || sleep 20; {process_dont_exist}" ), ignore_failures=True) File(pid_file, action="delete")
def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, krb_exec_search_paths, return_only_http_code, alert_name, user, connection_timeout = CONNECTION_TIMEOUT_DEFAULT): import uuid # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. Scope # this file by user in order to prevent sharing of cache files by multiple users. ccache_file_name = _md5("{0}|{1}".format(principal, keytab)).hexdigest() ccache_file_path = "{0}{1}{2}_{3}_cc_{4}".format(tmp_dir, os.sep, cache_file_prefix, user, ccache_file_name) kerberos_env = {'KRB5CCNAME': ccache_file_path} # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache if krb_exec_search_paths: klist_path_local = get_klist_path(krb_exec_search_paths) else: klist_path_local = get_klist_path() if shell.call("{0} -s {1}".format(klist_path_local, ccache_file_path), user=user)[0] != 0: if krb_exec_search_paths: kinit_path_local = get_kinit_path(krb_exec_search_paths) else: kinit_path_local = get_kinit_path() logger.debug("[Alert][{0}] Enabling Kerberos authentication via GSSAPI using ccache at {1}.".format( alert_name, ccache_file_path)) shell.checked_call("{0} -l 5m -c {1} -kt {2} {3} > /dev/null".format(kinit_path_local, ccache_file_path, keytab, principal), user=user) else: logger.debug("[Alert][{0}] Kerberos authentication via GSSAPI already enabled using ccache at {1}.".format( alert_name, ccache_file_path)) # check if cookies dir exists, if not then create it cookies_dir = os.path.join(tmp_dir, "cookies") if not os.path.exists(cookies_dir): os.makedirs(cookies_dir) cookie_file_name = str(uuid.uuid4()) cookie_file = os.path.join(cookies_dir, cookie_file_name) start_time = time.time() error_msg = None # setup timeouts for the request; ensure we use integers since that is what curl needs connection_timeout = int(connection_timeout) maximum_timeout = connection_timeout + 2 try: if return_only_http_code: _, curl_stdout, curl_stderr = get_user_call_output(['curl', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, '-w', '%{http_code}', url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout), '-o', '/dev/null'], user=user, env=kerberos_env) else: # returns response body _, curl_stdout, curl_stderr = get_user_call_output(['curl', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout)], user=user, env=kerberos_env) except Fail: if logger.isEnabledFor(logging.DEBUG): logger.exception("[Alert][{0}] Unable to make a web request.".format(alert_name)) raise finally: if os.path.isfile(cookie_file): os.remove(cookie_file) # empty quotes evaluates to false if curl_stderr: error_msg = curl_stderr time_millis = time.time() - start_time # empty quotes evaluates to false if curl_stdout: if return_only_http_code: return (int(curl_stdout), error_msg, time_millis) else: return (curl_stdout, error_msg, time_millis) logger.debug("[Alert][{0}] Curl response is empty! Please take a look at error message: ". format(alert_name, str(error_msg))) return ("", error_msg, time_millis)
def service_check(self, env): import params env.set_params(params) if params.streamline_ssl_enabled: streamline_api = format( "https://{params.hostname}:{params.streamline_ssl_port}/api/v1/catalog/streams/componentbundles" ) else: streamline_api = format( "http://{params.hostname}:{params.streamline_port}/api/v1/catalog/streams/componentbundles" ) Logger.info(streamline_api) max_retries = 3 success = False if (params.security_enabled) and (not params.streamline_sso_enabled): kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) return_code, out = shell.checked_call( kinit_cmd, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', user=params.smokeuser, ) for num in range(0, max_retries): try: Logger.info(format("Making http requests to {streamline_api}")) if params.security_enabled: get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + streamline_api return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) try: json_response = json.loads(stdout) success = True Logger.info( format( "Successfully made a API request to SAM. {stdout}" )) break except Exception as e: Logger.error( format( "Response from SAM API was not a valid JSON. Response: {stdout}" )) else: response = urllib2.urlopen(streamline_api) api_response = response.read() response_code = response.getcode() Logger.info(format("SAM response http status {response}")) if response.getcode() != 200: Logger.error( format( "Failed to fetch response for {streamline_api}" )) show_logs(params.streamline_log_dir, params.streamline_user) raise else: success = True Logger.info( format( "Successfully made a API request to SAM. {api_response}" )) break except (urllib2.URLError, ExecutionFailed) as e: Logger.error( format( "Failed to make API request to SAM server at {streamline_api},retrying.. {num} out of {max_retries}" )) time.sleep(num * 10) #exponential back-off continue if success != True: Logger.error( format( "Failed to make API request to SAM server at {streamline_api} after {max_retries}" )) raise
def hive_service_interactive(name, action='start'): import params import status_params pid_file = status_params.hive_interactive_pid cmd = format( "{start_hiveserver2_interactive_path} {hive_pid_dir}/hive-server2-interactive.out {hive_log_dir}/hive-server2-interactive.err {pid_file} {hive_server_interactive_conf_dir} {tez_interactive_conf_dir}" ) pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=params.hive_user, is_checked_call=False)[1] process_id_exists_command = format( "ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if action == 'start': check_fs_root(params.hive_server_interactive_conf_dir, params.execute_path_hive_interactive) daemon_cmd = cmd hadoop_home = params.hadoop_home hive_interactive_bin = "hive2" Execute(daemon_cmd, user=params.hive_user, environment={ 'HADOOP_HOME': hadoop_home, 'JAVA_HOME': params.java64_home, 'HIVE_BIN': hive_interactive_bin }, path=params.execute_path, not_if=process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": path_to_jdbc = params.target_hive_interactive if not params.jdbc_jar_name: path_to_jdbc = format("{hive_interactive_lib}/") + \ params.default_connectors_map[params.hive_jdbc_driver] if params.hive_jdbc_driver in params.default_connectors_map else None if not os.path.isfile(path_to_jdbc): path_to_jdbc = format("{hive_interactive_lib}/") + "*" error_message = "Error! Sorry, but we can't find jdbc driver with default name " + params.default_connectors_map[params.hive_jdbc_driver] + \ " in hive lib dir. So, db connection check can fail. Please run 'ambari-server setup --jdbc-db={db_name} --jdbc-driver={path_to_jdbc} on server host.'" Logger.error(error_message) db_connection_check_command = format( "{java64_home}/bin/java -cp {check_db_connection_jar}:{path_to_jdbc} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}" ) Execute(db_connection_check_command, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', tries=5, try_sleep=10) elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}") Execute(daemon_kill_cmd, not_if=format("! ({process_id_exists_command})")) # check if stopped the process, otherwise send hard kill command. try: Execute( format("! ({process_id_exists_command})"), tries=10, try_sleep=3, ) except: Execute(daemon_hard_kill_cmd, not_if=format("! ({process_id_exists_command}) ")) # check if stopped the process, else fail the task Execute( format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) File(pid_file, action="delete")
cmd = "" with open(prepare_war_cmd_file, "r") as f: cmd = f.readline().strip() if command_to_file != cmd: run_prepare_war = True Logger.info(format("Will run prepare war cmd since marker file {prepare_war_cmd_file} has contents which differ.\n" \ "Expected: {command_to_file}.\nActual: {cmd}.")) else: run_prepare_war = True Logger.info( format( "Will run prepare war cmd since marker file {prepare_war_cmd_file} is missing." )) return_code, libext_content, error_output = get_user_call_output( list_libext_command, user=params.oozie_user) libext_content = libext_content.strip() if run_prepare_war == False: if os.path.exists(libext_content_file): old_content = "" with open(libext_content_file, "r") as f: old_content = f.read().strip() if libext_content != old_content: run_prepare_war = True Logger.info(format("Will run prepare war cmd since marker file {libext_content_file} has contents which differ.\n" \ "Content of the folder {oozie_libext_dir} changed.")) else: run_prepare_war = True Logger.info(
def falcon(type, action=None, upgrade_type=None): import params if action == 'config': Directory( params.falcon_pid_dir, owner=params.falcon_user, create_parents=True, mode=0755, cd_access="a", ) Directory( params.falcon_log_dir, owner=params.falcon_user, create_parents=True, mode=0755, cd_access="a", ) Directory(params.falcon_webapp_dir, owner=params.falcon_user, create_parents=True) Directory(params.falcon_home, owner=params.falcon_user, create_parents=True) Directory(params.etc_prefix_dir, mode=0755, create_parents=True) Directory(params.falcon_conf_dir, owner=params.falcon_user, create_parents=True) File( params.falcon_conf_dir + '/falcon-env.sh', content=InlineTemplate(params.falcon_env_sh_template), owner=params.falcon_user, group=params.user_group, ) PropertiesFile(params.falcon_conf_dir + '/client.properties', properties=params.falcon_client_properties, mode=0644, owner=params.falcon_user) PropertiesFile(params.falcon_conf_dir + '/runtime.properties', properties=params.falcon_runtime_properties, mode=0644, owner=params.falcon_user) PropertiesFile(params.falcon_conf_dir + '/startup.properties', properties=params.falcon_startup_properties, mode=0644, owner=params.falcon_user) if params.falcon_graph_storage_directory: Directory(params.falcon_graph_storage_directory, owner=params.falcon_user, group=params.user_group, mode=0775, create_parents=True, cd_access="a") if params.falcon_graph_serialize_path: Directory(params.falcon_graph_serialize_path, owner=params.falcon_user, group=params.user_group, mode=0775, create_parents=True, cd_access="a") # Generate atlas-application.properties.xml file if params.falcon_atlas_support: # If Atlas is added later than Falcon, this package will be absent. install_atlas_hook_packages( params.atlas_plugin_package, params.atlas_ubuntu_plugin_package, params.host_sys_prepped, params.agent_stack_retry_on_unavailability, params.agent_stack_retry_count) atlas_hook_filepath = os.path.join(params.falcon_conf_dir, params.atlas_hook_filename) setup_atlas_hook(SERVICE.FALCON, params.falcon_atlas_application_properties, atlas_hook_filepath, params.falcon_user, params.user_group) # Falcon 0.10 uses FALCON_EXTRA_CLASS_PATH. # Setup symlinks for older versions. if params.current_version_formatted and check_stack_feature( StackFeature.FALCON_ATLAS_SUPPORT_2_3, params.current_version_formatted): setup_atlas_jar_symlinks("falcon", params.falcon_webinf_lib) if type == 'server': if action == 'config': if params.store_uri[0:4] == "hdfs": params.HdfsResource(params.store_uri, type="directory", action="create_on_execute", owner=params.falcon_user, mode=0755) elif params.store_uri[0:4] == "file": Directory(params.store_uri[7:], owner=params.falcon_user, create_parents=True) # TODO change to proper mode params.HdfsResource(params.falcon_apps_dir, type="directory", action="create_on_execute", owner=params.falcon_user, mode=0777) # In HDP 2.4 and earlier, the data-mirroring directory was copied to HDFS. if params.supports_data_mirroring: params.HdfsResource(params.dfs_data_mirroring_dir, type="directory", action="create_on_execute", owner=params.falcon_user, group=params.proxyuser_group, recursive_chown=True, recursive_chmod=True, mode=0770, source=params.local_data_mirroring_dir) # Falcon Extensions were supported in HDP 2.5 and higher. effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version( params.version) supports_falcon_extensions = effective_version and check_stack_feature( StackFeature.FALCON_EXTENSIONS, effective_version) if supports_falcon_extensions: params.HdfsResource(params.falcon_extensions_dest_dir, type="directory", action="create_on_execute", owner=params.falcon_user, group=params.proxyuser_group, recursive_chown=True, recursive_chmod=True, mode=0755, source=params.falcon_extensions_source_dir) # Create the extensons HiveDR store params.HdfsResource(os.path.join( params.falcon_extensions_dest_dir, "mirroring"), type="directory", action="create_on_execute", owner=params.falcon_user, group=params.proxyuser_group, mode=0770) # At least one HDFS Dir should be created, so execute the change now. params.HdfsResource(None, action="execute") Directory(params.falcon_local_dir, owner=params.falcon_user, create_parents=True, cd_access="a") if params.falcon_embeddedmq_enabled == True: Directory(os.path.abspath( os.path.join(params.falcon_embeddedmq_data, "..")), owner=params.falcon_user, create_parents=True) Directory(params.falcon_embeddedmq_data, owner=params.falcon_user, create_parents=True) # although Falcon's falcon-config.sh will use 'which hadoop' to figure # this out, in an upgraded cluster, it's possible that 'which hadoop' # still points to older binaries; it's safer to just pass in the # hadoop home directory to use environment_dictionary = {"HADOOP_HOME": params.hadoop_home_dir} pid = get_user_call_output.get_user_call_output( format("cat {server_pid_file}"), user=params.falcon_user, is_checked_call=False)[1] process_exists = format("ls {server_pid_file} && ps -p {pid}") if action == 'start': try: Execute( format('{falcon_home}/bin/falcon-config.sh server falcon'), user=params.falcon_user, path=params.hadoop_bin_dir, environment=environment_dictionary, not_if=process_exists, ) except: show_logs(params.falcon_log_dir, params.falcon_user) raise if not os.path.exists(params.target_jar_file): try: File(params.target_jar_file, content=DownloadSource(params.bdb_resource_name), mode=0755) except: exc_msg = traceback.format_exc() exception_message = format( "Caught Exception while downloading {bdb_resource_name}:\n{exc_msg}" ) Logger.error(exception_message) if not os.path.isfile(params.target_jar_file): error_message = """ If you are using bdb as the Falcon graph db store, please run ambari-server setup --jdbc-db=bdb --jdbc-driver=<path to je5.0.73.jar> on the ambari server host. Otherwise falcon startup will fail. Otherwise please configure Falcon to use HBase as the backend as described in the Falcon documentation. """ Logger.error(error_message) try: Execute( format( '{falcon_home}/bin/falcon-start -port {falcon_port}'), user=params.falcon_user, path=params.hadoop_bin_dir, environment=environment_dictionary, not_if=process_exists, ) except: show_logs(params.falcon_log_dir, params.falcon_user) raise if action == 'stop': try: Execute(format('{falcon_home}/bin/falcon-stop'), user=params.falcon_user, path=params.hadoop_bin_dir, environment=environment_dictionary) except: show_logs(params.falcon_log_dir, params.falcon_user) raise File(params.server_pid_file, action='delete')
def _run_command(self, target, operation, method='POST', assertable_result=True, file_to_put=None, ignore_status_codes=[], **kwargs): """ assertable_result - some POST requests return '{"boolean":false}' or '{"boolean":true}' depending on if query was successful or not, we can assert this for them """ target = HdfsResourceProvider.parse_path(target) if not target: raise Fail("Target cannot be empty") url = format("{address}/webhdfs/v1{target}?op={operation}", address=self.address) request_args = kwargs if not self.security_enabled: request_args['user.name'] = self.run_user for k, v in request_args.iteritems(): url = format("{url}&{k}={v}") cmd = ["curl", "-sS", "-L", "-w", "%{http_code}", "-X", method] # When operation is "OPEN" the target is actually the DFS file to download and the file_to_put is actually the target see _download_file if operation == "OPEN": cmd += ["-o", file_to_put] else: if file_to_put and not os.path.exists(file_to_put): raise Fail(format("File {file_to_put} is not found.")) if file_to_put: cmd += [ "--data-binary", "@" + file_to_put, "-H", "Content-Type: application/octet-stream" ] if self.security_enabled: cmd += ["--negotiate", "-u", ":"] if self.is_https_enabled: cmd += ["-k"] cmd.append(url) _, out, err = get_user_call_output(cmd, user=self.run_user, logoutput=self.logoutput, quiet=False) status_code = out[-3:] out = out[:-3] # remove last line from output which is status code try: result_dict = json.loads(out) except ValueError: result_dict = out if status_code not in WebHDFSUtil.valid_status_codes + ignore_status_codes or assertable_result and result_dict and not result_dict[ 'boolean']: formatted_output = json.dumps(result_dict, indent=2) if isinstance( result_dict, dict) else result_dict formatted_output = err + "\n" + formatted_output err_msg = "Execution of '%s' returned status_code=%s. %s" % ( shell.string_cmd_from_args_list(cmd), status_code, formatted_output) raise WebHDFSCallException(err_msg, result_dict) return result_dict
def service_check(self, env): import params env.set_params(params) if params.stack_version_formatted_major and check_stack_feature( StackFeature.ROLLING_UPGRADE, params.stack_version_formatted_major): path_to_distributed_shell_jar = format( "{stack_root}/current/hadoop-yarn-client/hadoop-yarn-applications-distributedshell.jar" ) else: path_to_distributed_shell_jar = "/usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell*.jar" yarn_distrubuted_shell_check_params = [ "yarn org.apache.hadoop.yarn.applications.distributedshell.Client", "-shell_command", "ls", "-num_containers", "{number_of_nm}", "-jar", "{path_to_distributed_shell_jar}", "-timeout", "300000" ] yarn_distrubuted_shell_check_cmd = format( " ".join(yarn_distrubuted_shell_check_params)) if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) smoke_cmd = format( "{kinit_cmd} {yarn_distrubuted_shell_check_cmd}") else: smoke_cmd = yarn_distrubuted_shell_check_cmd return_code, out = shell.checked_call( smoke_cmd, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', user=params.smokeuser, ) m = re.search("appTrackingUrl=(.*),\s", out) app_url = m.group(1) splitted_app_url = str(app_url).split('/') for item in splitted_app_url: if "application" in item: application_name = item for rm_webapp_address in params.rm_webapp_addresses_list: info_app_url = params.scheme + "://" + rm_webapp_address + "/ws/v1/cluster/apps/" + application_name get_app_info_cmd = "curl --negotiate -u : -ksL --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) try: json_response = json.loads(stdout) except Exception as e: raise Exception("Could not get json response from YARN API") if json_response is None or 'app' not in json_response or \ 'state' not in json_response['app'] or 'finalStatus' not in json_response['app']: raise Exception("Application " + app_url + " returns invalid data.") if json_response['app']['state'] != "FINISHED" or json_response[ 'app']['finalStatus'] != "SUCCEEDED": raise Exception( "Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED." )
def start(self, env, upgrade_type=None): import params self.configure(env) Logger.info("Configured Dirs") pid = \ get_user_call_output.get_user_call_output(format("cat {dpprofiler_pid_file}"), user=params.dpprofiler_user, is_checked_call=False)[1] process_exists = format("ls {dpprofiler_pid_file} && ps -p {pid}") if params.credential_store_enabled: if 'hadoop.security.credential.provider.path' in params.dpprofiler_config: credential_provider_path = params.dpprofiler_config['hadoop.security.credential.provider.path'] credential_provider_src_path = credential_provider_path[len('jceks://file'):] credential_provider_dest_path = params.dpprofiler_credential_provider_path[len('jceks://file'):] File(credential_provider_dest_path, owner=params.dpprofiler_user, group=params.dpprofiler_group, mode=0600, content=StaticFile(credential_provider_src_path) ) Execute(format( "hadoop credential create {atlas_password_alias} -provider {dpprofiler_credential_provider_path} -value {atlas_password}")) File(params.dpprofiler_credential_provider_tmp_path, owner=params.dpprofiler_user, group=params.dpprofiler_group, mode=0644, content=StaticFile(credential_provider_dest_path) ) credential_provider_hdfs_src_path = params.dpprofiler_credential_provider_hdfs_path[ len('jceks://hdfs'):] params.HdfsResource(credential_provider_hdfs_src_path, action="create_on_execute", type="file", source=params.dpprofiler_credential_provider_tmp_path, owner=params.dpprofiler_user, mode=0600, recursive_chown=True, recursive_chmod=True, dfs_type=params.default_fs ) File(params.dpprofiler_credential_provider_tmp_path, action="delete") if os.path.exists(params.dpprofiler_credential_provider_crc_path): File(params.dpprofiler_credential_provider_crc_path, action="delete") else: Logger.error( "hadoop.security.credential.provider.path property not found in dpprofiler-env config-type") Logger.info("Starting profiler agent") environment_dictionary = {} environment_dictionary["DPPROFILER_CRYPTO_SECRET"] = params.dpprofiler_crypto_secret kerberos_props = '' if params.dpprofiler_secured == "true": kerberos_props = format( '-Djava.security.krb5.conf=/etc/krb5.conf -Djavax.security.auth.useSubjectCredsOnly=false -Djava.security.auth.login.config={dpprofiler_conf_dir}/krb5JAASLogin.conf') Execute(format('rm -f {params.dpprofiler_pid_file}'), not_if=process_exists ) if params.patch_mysql_driver: self.patch_mysql_driver() self.append_to_classpath(params.dpprofiler_hadoop_conf_dir) Execute(format( 'nohup {dpprofiler_home}/bin/profiler-agent -Dhttp.port={dpprofiler_http_port} {kerberos_props} > {dpprofiler_log_dir}/profiler_agent.out 2>&1 &'), user=params.dpprofiler_user, not_if=process_exists, environment=environment_dictionary ) try: # check if pid file created, else fail the task Execute(format("(ls {dpprofiler_pid_file})"), tries=20, try_sleep=3, ) except: show_logs(params.dpprofiler_log_dir, params.dpprofiler_user) raise newpid = \ get_user_call_output.get_user_call_output(format("cat {dpprofiler_pid_file}"), user=params.dpprofiler_user, is_checked_call=False)[1] Logger.info(format("Process pid is: {newpid}"))
def service_check(self, env): import params env.set_params(params) params.HdfsResource( format("/user/{smokeuser}"), type="directory", action="create_on_execute", owner=params.smokeuser, mode=params.smoke_hdfs_user_mode, ) path_to_distributed_shell_jar = params.install_dir + "/share/hadoop/yarn/hadoop-yarn-applications-distributedshell*.jar" yarn_distrubuted_shell_check_params = [ "yarn org.apache.hadoop.yarn.applications.distributedshell.Client", "-shell_command", "ls", "-num_containers", "{number_of_nm}", "-jar", "{path_to_distributed_shell_jar}", "-timeout", "300000", "--queue", "{service_check_queue_name}" ] yarn_distrubuted_shell_check_cmd = format( " ".join(yarn_distrubuted_shell_check_params)) if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) smoke_cmd = format( "{kinit_cmd} {yarn_distrubuted_shell_check_cmd}") else: smoke_cmd = yarn_distrubuted_shell_check_cmd return_code, out = shell.checked_call( smoke_cmd, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', user=params.smokeuser, ) m = re.search("appTrackingUrl=(.*),\s", out) app_url = m.group(1) splitted_app_url = str(app_url).split('/') for item in splitted_app_url: if "application" in item: application_name = item # Find out the active RM from RM list # Raise an exception if the active rm cannot be determined active_rm_webapp_address = self.get_active_rm_webapp_address() Logger.info("Active Resource Manager web app address is : " + active_rm_webapp_address) # Verify job state from active resource manager via rest api info_app_url = params.scheme + "://" + active_rm_webapp_address + "/ws/v1/cluster/apps/" + application_name get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) try: json_response = json.loads(stdout) except Exception as e: raise Fail( format( "Response from YARN API was not a valid JSON. Response: {stdout}" )) if json_response is None or 'app' not in json_response or \ 'state' not in json_response['app'] or 'finalStatus' not in json_response['app']: raise Fail("Application " + app_url + " returns invalid data.") if json_response['app']['state'] != "FINISHED" or json_response['app'][ 'finalStatus'] != "SUCCEEDED": raise Fail( "Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED.")
def service(name, action='start'): import params import status_params pid_file = status_params.pid_files[name] no_op_test = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ), user=params.flink_user) flink_env = format("export PATH=$JAVA_HOME/bin:$PATH") if action == "start": # get the hadoop classpath from command hadoop classpath, from the plugin https://github.com/abajwa-hw/ambari-flink-service/ cmd_open = subprocess.Popen(["hadoop", "classpath"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) hadoop_classpath = cmd_open.communicate()[0].strip() cmd = format( "export HADOOP_CONF_DIR={hadoop_conf_dir}; export HADOOP_CLASSPATH={hadoop_classpath}; /opt/flink/bin/yarn-session.sh -n {flink_numcontainers} -s {flink_numberoftaskslots} -jm {flink_jobmanager_memory} -tm {flink_container_memory} -qu {flink_queue} -nm {flink_appname} -d" ) #cmd = format("{cmd} &\n echo $! > {pid_file}") #cmd = format("{cmd} && yarn application -list | grep {flink_appname} | cut -f1 > {pid_file}") # write yarn application id in pid_file Execute(cmd, not_if=no_op_test, user=params.flink_user, path=params.flink_bin_dir) Logger.info('********************************') Logger.info('* Flink Started *********************') Logger.info('********************************') # TODO: parametrizar keytab # TODO: si existen dos application de flink_appname funciona?? cmd2 = format( "kinit -kt {flink_kerberos_keytab} {flink_kerberos_principal}; yarn app -list | grep {flink_appname} | cut -f1 > {pid_file}" ) Execute(cmd2, not_if=no_op_test, user=params.flink_user) Logger.info('********************************') Logger.info('* Pid saved *********************') Logger.info('********************************') File(pid_file, owner=params.flink_user, group=params.user_group) elif action == "stop": process_dont_exist = format("! ({no_op_test})") if os.path.exists(pid_file): #pid = get_user_call_output.get_user_call_output(format("! test -f {pid_file} || cat {pid_file}"), user=params.flink_user)[1] # if multiple processes are running (for example user can start logviewer from console) # there can be more than one id #pid = pid.replace("\n", " ") #Execute(format("{sudo} kill {pid}"), not_if = process_dont_exist) #Execute(format("{sudo} kill -9 {pid}"), #not_if = format("sleep 2; {process_dont_exist} || sleep 20; {process_dont_exist}"),ignore_failures = True) #File(pid_file, action = "delete") pid = get_user_call_output.get_user_call_output( format("! test -f {pid_file} || cat {pid_file}"), user=params.flink_user)[1] pid = pid.replace("\n", " ") #TODO: parametrizar keytab cmd = format( "kinit -kt {flink_kerberos_keytab} {flink_kerberos_principal}; yarn application -kill {pid}" ) Execute(cmd, not_if=False, user=params.flink_user) File(pid_file, action="delete")
def service_check(self, env): import params env.set_params(params) params.HdfsResource( format("/user/{smokeuser}"), type="directory", action="create_on_execute", owner=params.smokeuser, mode=params.smoke_hdfs_user_mode, ) if params.stack_version_formatted_major and check_stack_feature( StackFeature.ROLLING_UPGRADE, params.stack_version_formatted_major): path_to_distributed_shell_jar = format( "{stack_root}/current/hadoop-yarn-client/hadoop-yarn-applications-distributedshell.jar" ) else: path_to_distributed_shell_jar = "/usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell*.jar" yarn_distrubuted_shell_check_params = [ "yarn org.apache.hadoop.yarn.applications.distributedshell.Client", "-shell_command", "ls", "-num_containers", "{number_of_nm}", "-jar", "{path_to_distributed_shell_jar}", "-timeout", "300000", "--queue", "{service_check_queue_name}" ] yarn_distrubuted_shell_check_cmd = format( " ".join(yarn_distrubuted_shell_check_params)) if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};" ) smoke_cmd = format( "{kinit_cmd} {yarn_distrubuted_shell_check_cmd}") else: smoke_cmd = yarn_distrubuted_shell_check_cmd return_code, out = shell.checked_call( smoke_cmd, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', user=params.smokeuser, ) m = re.search("appTrackingUrl=(.*),\s", out) app_url = m.group(1) splitted_app_url = str(app_url).split('/') for item in splitted_app_url: if "application" in item: application_name = item for rm_webapp_address in params.rm_webapp_addresses_list: info_app_url = params.scheme + "://" + rm_webapp_address + "/ws/v1/cluster/apps/" + application_name get_app_info_cmd = "curl --negotiate -u : -ksL --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) # Handle HDP<2.2.8.1 where RM doesn't do automatic redirection from standby to active if stdout.startswith( "This is standby RM. Redirecting to the current active RM:" ): Logger.info( format( "Skipped checking of {rm_webapp_address} since returned '{stdout}'" )) continue try: json_response = json.loads(stdout) except Exception as e: raise Fail( format( "Response from YARN API was not a valid JSON. Response: {stdout}" )) if json_response is None or 'app' not in json_response or \ 'state' not in json_response['app'] or 'finalStatus' not in json_response['app']: raise Fail("Application " + app_url + " returns invalid data.") if json_response['app']['state'] != "FINISHED" or json_response[ 'app']['finalStatus'] != "SUCCEEDED": raise Fail( "Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED." )
def hive_service(name, action='start', upgrade_type=None): import params import status_params if name == 'metastore': pid_file = status_params.hive_metastore_pid cmd = format( "{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.err {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) elif name == 'hiveserver2': pid_file = status_params.hive_pid cmd = format( "{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.err {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) if params.security_enabled and params.current_version and check_stack_feature( StackFeature.HIVE_SERVER2_KERBERIZED_ENV, params.current_version): hive_kinit_cmd = format( "{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; " ) Execute(hive_kinit_cmd, user=params.hive_user) pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=params.hive_user, is_checked_call=False)[1] process_id_exists_command = format( "ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if action == 'start': if name == 'hiveserver2': check_fs_root(params.hive_server_conf_dir, params.execute_path) daemon_cmd = cmd hadoop_home = params.hadoop_home hive_bin = "hive" # upgrading hiveserver2 (rolling_restart) means that there is an existing, # de-registering hiveserver2; the pid will still exist, but the new # hiveserver is spinning up on a new port, so the pid will be re-written if upgrade_type == UPGRADE_TYPE_ROLLING: process_id_exists_command = None if params.version and params.stack_root: hadoop_home = format("{stack_root}/{version}/hadoop") hive_bin = os.path.join(params.hive_bin, hive_bin) Execute(daemon_cmd, user=params.hive_user, environment={ 'HADOOP_HOME': hadoop_home, 'JAVA_HOME': params.java64_home, 'HIVE_BIN': hive_bin }, path=params.execute_path, not_if=process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": validation_called = False if params.hive_jdbc_target is not None: validation_called = True validate_connection(params.hive_jdbc_target, params.hive_lib) if params.hive2_jdbc_target is not None: validation_called = True validate_connection(params.hive2_jdbc_target, params.hive_server2_hive2_lib) if not validation_called: emessage = "ERROR! DB connection check should be executed at least one time!" Logger.error(emessage) elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}") Execute(daemon_kill_cmd, not_if=format("! ({process_id_exists_command})")) wait_time = 5 Execute( daemon_hard_kill_cmd, not_if=format( "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )" ), ignore_failures=True) try: # check if stopped the process, else fail the task Execute( format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) except: show_logs(params.hive_log_dir, params.hive_user) raise File(pid_file, action="delete")
def wait_until_server_starts(): import params streamline_api = format( "http://{params.hostname}:{params.streamline_port}/api/v1/config/streamline" ) Logger.info(streamline_api) max_retries = 6 success = False curl_connection_timeout = '5' for num in range(0, max_retries): try: Logger.info(format("Making http requests to {streamline_api}")) if params.security_enabled: get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + curl_connection_timeout + " " + streamline_api return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.streamline_user, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) try: json_response = json.loads(stdout) success = True Logger.info( format( "Successfully made a API request to SAM. {stdout}") ) break except Exception as e: Logger.error( format( "Response from SAM API was not a valid JSON. Response: {stdout}" )) else: response = urllib2.urlopen(streamline_api) api_response = response.read() response_code = response.getcode() Logger.info(format("SAM response http status {response}")) if response.getcode() != 200: Logger.error( format( "Failed to fetch response for {streamline_api}")) show_logs(params.streamline_log_dir, params.streamline_user) raise else: success = True Logger.info( format( "Successfully made a API request to SAM. {api_response}" )) break except (urllib2.URLError, ExecutionFailed) as e: Logger.error( format( "Failed to make API request to SAM server at {streamline_api},retrying.. {num} out of {max_retries}" )) time.sleep(num * 5) # exponential back-off continue if success != True: Logger.error( format( "Failed to make API request to SAM server at {streamline_api} after {max_retries}" )) raise
def service_check(self, env): import params env.set_params(params) Logger.info("Registry check passed") if params.registry_ssl_enabled: registry_api = format( "https://{params.hostname}:{params.registry_ssl_port}/api/v1/schemaregistry/schemaproviders" ) else: registry_api = format( "http://{params.hostname}:{params.registry_port}/api/v1/schemaregistry/schemaproviders" ) Logger.info(registry_api) max_retries = 3 success = False if params.security_enabled: kinit_cmd = format( "{kinit_path_local} -kt {params.smoke_user_keytab} {params.smokeuser_principal};" ) Execute(kinit_cmd, user=params.smokeuser) for num in range(0, max_retries): try: Logger.info(format("Making http requests to {registry_api}")) if (params.security_enabled or params.registry_ssl_enabled): get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + registry_api return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', ) try: json_response = json.loads(stdout) success = True Logger.info( format( "Successfully made a API request to registry. {stdout}" )) break except Exception as e: Logger.error( format( "Response from REGISTRY API was not a valid JSON. Response: {stdout}" )) else: response = urllib2.urlopen(registry_api) api_response = response.read() response_code = response.getcode() Logger.info( format( "registry response http status {response_code}")) if response.getcode() != 200: Logger.error( format( "Failed to fetch response for {registry_api}")) show_logs(params.registry_log_dir, params.registry_user) else: success = True Logger.info( format( "Successfully made a API request to registry. {api_response}" )) break except urllib2.URLError as e: Logger.error( format( "Failed to make API request to Registry server at {registry_api},retrying.. {num} out {max_retries}" )) time.sleep(num * 10) # exponential back off continue if success != True: raise Fail( format( "Failed to make API request to Registry server at {registry_api} after {max_retries}" ))
"dpprofiler.sensitivepartitioned.metric.name"] dpprofiler_senstitive_metric_name = dpprofiler_config[ "dpprofiler.senstitive.metric.name"] dpprofiler_profiler_autoregister = "true" livy_session_config = dpprofiler_config["livy.session.config"] dpprofiler_custom_config = dpprofiler_config["dpprofiler.custom.config"] if not dpprofiler_config["dpprofiler.profiler.autoregister"]: dpprofiler_profiler_autoregister = "false" dpprofiler_crypto_secret = \ get_user_call_output.get_user_call_output(format("date +%s | sha256sum | base64 | head -c 32 "), user=dpprofiler_user, is_checked_call=False)[1] livy_hosts = default("/clusterHostInfo/livy2_server_hosts", []) livy_url = "" if stack_version_formatted and check_stack_feature(StackFeature.SPARK_LIVY, stack_version_formatted) and \ len(livy_hosts) > 0: livy_livyserver_host = str(livy_hosts[0]) livy_livyserver_port = config['configurations']['livy2-conf'][ 'livy.server.port'] livy_url = "http://" + livy_livyserver_host + ":" + livy_livyserver_port dpprofiler_secured = "false" if config['configurations']['cluster-env']['security_enabled']:
def service_check(self, env): import params env.set_params(params) if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, "2.2") >= 0: path_to_distributed_shell_jar = ( "/usr/hdp/current/hadoop-yarn-client/hadoop-yarn-applications-distributedshell.jar" ) else: path_to_distributed_shell_jar = "/usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell*.jar" yarn_distrubuted_shell_check_cmd = format( "yarn org.apache.hadoop.yarn.applications.distributedshell.Client " "-shell_command ls -num_containers {number_of_nm} -jar {path_to_distributed_shell_jar}" ) if params.security_enabled: kinit_cmd = format("{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};") smoke_cmd = format("{kinit_cmd} {yarn_distrubuted_shell_check_cmd}") else: smoke_cmd = yarn_distrubuted_shell_check_cmd return_code, out = shell.checked_call( smoke_cmd, path="/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin", user=params.smokeuser ) m = re.search("appTrackingUrl=(.*),\s", out) app_url = m.group(1) splitted_app_url = str(app_url).split("/") for item in splitted_app_url: if "application" in item: application_name = item json_response_received = False for rm_host in params.rm_hosts: info_app_url = ( params.scheme + "://" + rm_host + ":" + params.rm_active_port + "/ws/v1/cluster/apps/" + application_name ) get_app_info_cmd = ( "curl --negotiate -u : -ksL --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url ) return_code, stdout, _ = get_user_call_output( get_app_info_cmd, user=params.smokeuser, path="/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin" ) try: json_response = json.loads(stdout) json_response_received = True if json_response["app"]["state"] != "FINISHED" or json_response["app"]["finalStatus"] != "SUCCEEDED": raise Exception( "Application " + app_url + " state/status is not valid. Should be FINISHED/SUCCEEDED." ) except Exception as e: pass if not json_response_received: raise Exception("Could not get json response from YARN API")
def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix, krb_exec_search_paths, return_only_http_code, caller_label, user, connection_timeout=CONNECTION_TIMEOUT_DEFAULT, ca_certs=None, kinit_timer_ms=DEFAULT_KERBEROS_KINIT_TIMER_MS, method='', body='', header=''): """ Makes a curl request using the kerberos credentials stored in a calculated cache file. The cache file is created by combining the supplied principal, keytab, user, and request name into a unique hash. This function will use the klist command to determine if the cache is expired and will perform a kinit if necessary. Additionally, it has an internal timer to force a kinit after a configurable amount of time. This is to prevent boundary issues where requests hit the edge of a ticket's lifetime. :param tmp_dir: the directory to use for storing the local kerberos cache for this request. :param keytab: the location of the keytab to use when performing a kinit :param principal: the principal to use when performing a kinit :param url: the URL to request :param cache_file_prefix: an identifier used to build the unique cache name for this request. This ensures that multiple requests can use the same cache. :param krb_exec_search_paths: the search path to use for invoking kerberos binaries :param return_only_http_code: True to return only the HTTP code, False to return GET content :param caller_label: an identifier to give context into the caller of this module (used for logging) :param user: the user to invoke the curl command as :param connection_timeout: if specified, a connection timeout for curl (default 10 seconds) :param ca_certs: path to certificates :param kinit_timer_ms: if specified, the time (in ms), before forcing a kinit even if the klist cache is still valid. :return: """ import uuid # backward compatibility with old code and management packs, etc. All new code need pass ca_certs explicitly if ca_certs is None: try: from ambari_agent.AmbariConfig import AmbariConfig ca_certs = AmbariConfig.get_resolved_config( ).get_ca_cert_file_path() except: pass # start off false is_kinit_required = False # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing curl. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. Scope # this file by user in order to prevent sharing of cache files by multiple users. ccache_file_name = _md5("{0}|{1}".format(principal, keytab)).hexdigest() curl_krb_cache_path = os.path.join(tmp_dir, "curl_krb_cache") if not os.path.exists(curl_krb_cache_path): os.makedirs(curl_krb_cache_path) os.chmod(curl_krb_cache_path, 01777) ccache_file_path = "{0}{1}{2}_{3}_cc_{4}".format(curl_krb_cache_path, os.sep, cache_file_prefix, user, ccache_file_name) kerberos_env = {'KRB5CCNAME': ccache_file_path} # concurrent kinit's can cause the following error: # Internal credentials cache error while storing credentials while getting initial credentials kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS) kinit_lock.acquire() try: # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache if krb_exec_search_paths: klist_path_local = get_klist_path(krb_exec_search_paths) else: klist_path_local = get_klist_path() # take a look at the last time kinit was run for the specified cache and force a new # kinit if it's time; this helps to avoid problems approaching ticket boundary when # executing a klist and then a curl last_kinit_time = _KINIT_CACHE_TIMES.get(ccache_file_name, 0) current_time = long(time.time()) if current_time - kinit_timer_ms > last_kinit_time: is_kinit_required = True # if the time has not expired, double-check that the cache still has a valid ticket if not is_kinit_required: klist_command = "{0} -s {1}".format(klist_path_local, ccache_file_path) is_kinit_required = (shell.call(klist_command, user=user)[0] != 0) # if kinit is required, the perform the kinit if is_kinit_required: if krb_exec_search_paths: kinit_path_local = get_kinit_path(krb_exec_search_paths) else: kinit_path_local = get_kinit_path() logger.debug( "Enabling Kerberos authentication for %s via GSSAPI using ccache at %s", caller_label, ccache_file_path) # kinit; there's no need to set a ticket timeout as this will use the default invalidation # configured in the krb5.conf - regenerating keytabs will not prevent an existing cache # from working correctly shell.checked_call("{0} -c {1} -kt {2} {3} > /dev/null".format( kinit_path_local, ccache_file_path, keytab, principal), user=user) # record kinit time _KINIT_CACHE_TIMES[ccache_file_name] = current_time else: # no kinit needed, use the cache logger.debug( "Kerberos authentication for %s via GSSAPI already enabled using ccache at %s.", caller_label, ccache_file_path) finally: kinit_lock.release() # check if cookies dir exists, if not then create it cookies_dir = os.path.join(tmp_dir, "cookies") if not os.path.exists(cookies_dir): os.makedirs(cookies_dir) cookie_file_name = str(uuid.uuid4()) cookie_file = os.path.join(cookies_dir, cookie_file_name) start_time = time.time() error_msg = None # setup timeouts for the request; ensure we use integers since that is what curl needs connection_timeout = int(connection_timeout) maximum_timeout = connection_timeout + 2 ssl_options = ['-k'] if ca_certs: ssl_options = ['--cacert', ca_certs] try: if return_only_http_code: _, curl_stdout, curl_stderr = get_user_call_output( ['curl', '--location-trusted'] + ssl_options + [ '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, '-w', '%{http_code}', url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout), '-o', '/dev/null' ], user=user, env=kerberos_env) else: curl_command = ['curl', '--location-trusted'] + ssl_options + [ '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout) ] # returns response body if len(method) > 0 and len(body) == 0 and len(header) == 0: curl_command.extend(['-X', method]) elif len(method) > 0 and len(body) == 0 and len(header) > 0: curl_command.extend(['-H', header, '-X', method]) elif len(method) > 0 and len(body) > 0 and len(header) == 0: curl_command.extend(['-X', method, '-d', body]) elif len(method) > 0 and len(body) > 0 and len(header) > 0: curl_command.extend(['-H', header, '-X', method, '-d', body]) _, curl_stdout, curl_stderr = get_user_call_output( curl_command, user=user, env=kerberos_env) except Fail: if logger.isEnabledFor(logging.DEBUG): logger.exception( "Unable to make a curl request for {0}.".format(caller_label)) raise finally: if os.path.isfile(cookie_file): os.remove(cookie_file) # empty quotes evaluates to false if curl_stderr: error_msg = curl_stderr time_millis = time.time() - start_time # empty quotes evaluates to false if curl_stdout: if return_only_http_code: return (int(curl_stdout), error_msg, time_millis) else: return (curl_stdout, error_msg, time_millis) logger.debug("The curl response for %s is empty; standard error = %s", caller_label, str(error_msg)) return ("", error_msg, time_millis)
def hive_service(name, action='start', upgrade_type=None): import params if name == 'metastore': pid_file = format("{hive_pid_dir}/{hive_metastore_pid}") cmd = format( "{start_metastore_path} {hive_log_dir}/hive-metastore.out {hive_log_dir}/hive-metastore-err.out {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) elif name == 'hiveserver2': pid_file = format("{hive_pid_dir}/{hive_pid}") cmd = format( "{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2-err.out {pid_file} {hive_server_conf_dir} {hive_log_dir}" ) pid = get_user_call_output.get_user_call_output(format("cat {pid_file}"), user=params.hive_user, is_checked_call=False)[1] process_id_exists_command = format( "ls {pid_file} >/dev/null 2>&1 && ps -p {pid} >/dev/null 2>&1") if action == 'start': if name == 'hiveserver2': check_fs_root() daemon_cmd = cmd hadoop_home = params.hadoop_home hive_bin = "hive" # upgrading hiveserver2 (rolling_restart) means that there is an existing, # de-registering hiveserver2; the pid will still exist, but the new # hiveserver is spinning up on a new port, so the pid will be re-written if upgrade_type == UPGRADE_TYPE_ROLLING: process_id_exists_command = None if params.version: import os hadoop_home = format("/usr/iop/{version}/hadoop") hive_bin = os.path.join(params.hive_bin, hive_bin) Execute(daemon_cmd, user=params.hive_user, environment={ 'HADOOP_HOME': hadoop_home, 'JAVA_HOME': params.java64_home, 'HIVE_BIN': hive_bin }, path=params.execute_path, not_if=process_id_exists_command) if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \ params.hive_jdbc_driver == "org.postgresql.Driver" or \ params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver": db_connection_check_command = format( "{java64_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}" ) Execute(db_connection_check_command, path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin', tries=5, try_sleep=10) elif action == 'stop': daemon_kill_cmd = format("{sudo} kill {pid}") daemon_hard_kill_cmd = format("{sudo} kill -9 {pid}") Execute(daemon_kill_cmd, not_if=format("! ({process_id_exists_command})")) wait_time = 5 Execute( daemon_hard_kill_cmd, not_if=format( "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )" )) # check if stopped the process, else fail the task Execute( format("! ({process_id_exists_command})"), tries=20, try_sleep=3, ) File(pid_file, action="delete")
def service(name, action='start'): import params import status_params pid_file = status_params.pid_files[name] no_op_test = as_user(format( "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1" ), user=params.storm_user) if name == "logviewer" or name == "drpc": tries_count = 12 else: tries_count = 6 if name == 'ui': process_grep = "backtype.storm.ui.core$" elif name == "rest_api": process_grep = format("{rest_lib_dir}/storm-rest-.*\.jar$") else: process_grep = format("storm.daemon.{name}$") find_proc = format("{jps_binary} -l | grep {process_grep}") write_pid = format("{find_proc} | awk {{'print $1'}} > {pid_file}") crt_pid_cmd = format("{find_proc} && {write_pid}") storm_env = format( "source {conf_dir}/storm-env.sh ; export PATH=$JAVA_HOME/bin:$PATH") if action == "start": if name == "rest_api": process_cmd = format( "{storm_env} ; java -jar {rest_lib_dir}/`ls {rest_lib_dir} | grep -wE storm-rest-[0-9.-]+\.jar` server" ) cmd = format( "{process_cmd} {rest_api_conf_file} > {log_dir}/restapi.log 2>&1" ) else: cmd = format( "{storm_env} ; storm {name} > {log_dir}/{name}.out 2>&1") Execute(cmd, not_if=no_op_test, user=params.storm_user, wait_for_finish=False, path=params.storm_bin_dir) Execute(crt_pid_cmd, user=params.storm_user, logoutput=True, tries=tries_count, try_sleep=10, path=params.storm_bin_dir) elif action == "stop": process_dont_exist = format("! ({no_op_test})") if os.path.exists(pid_file): pid = get_user_call_output.get_user_call_output( format("! test -f {pid_file} || cat {pid_file}"), user=params.storm_user)[1] # if multiple processes are running (for example user can start logviewer from console) # there can be more than one id pid = pid.replace("\n", " ") Execute(format("{sudo} kill {pid}"), not_if=process_dont_exist) Execute( format("{sudo} kill -9 {pid}"), not_if=format( "sleep 2; {process_dont_exist} || sleep 20; {process_dont_exist}" ), ignore_failures=True) File(pid_file, action="delete")
def prepare_war(params): """ Attempt to call prepare-war command if the marker files don't exist or their content doesn't equal the expected. The marker file for a command is stored in <stack-root>/current/oozie-server/.prepare_war_cmd The marker file for a content of libext folder is stored in <stack-root>/current/oozie-server/.war_libext_content """ prepare_war_cmd_file = format("{oozie_home}/.prepare_war_cmd") libext_content_file = format("{oozie_home}/.war_libext_content") list_libext_command = format("ls -l {oozie_libext_dir}") + " | awk '{print $9, $5}' | awk 'NF > 0'" # DON'T CHANGE THE VALUE SINCE IT'S USED TO DETERMINE WHETHER TO RUN THE COMMAND OR NOT BY READING THE MARKER FILE. # Oozie tmp dir should be /var/tmp/oozie and is already created by a function above. command = format("cd {oozie_tmp_dir} && {oozie_setup_sh} prepare-war {oozie_secure}").strip() # oozie_setup_sh and oozie_setup_sh_current are different during Ambaripreupload command_to_file = format("cd {oozie_tmp_dir} && {oozie_setup_sh_current} prepare-war {oozie_secure}").strip() run_prepare_war = False if os.path.exists(prepare_war_cmd_file): cmd = "" with open(prepare_war_cmd_file, "r") as f: cmd = f.readline().strip() if command_to_file != cmd: run_prepare_war = True Logger.info(format("Will run prepare war cmd since marker file {prepare_war_cmd_file} has contents which differ.\n" \ "Expected: {command_to_file}.\nActual: {cmd}.")) else: run_prepare_war = True Logger.info(format("Will run prepare war cmd since marker file {prepare_war_cmd_file} is missing.")) return_code, libext_content, error_output = get_user_call_output.get_user_call_output(list_libext_command, user=params.oozie_user) libext_content = libext_content.strip() if run_prepare_war == False: if os.path.exists(libext_content_file): old_content = "" with open(libext_content_file, "r") as f: old_content = f.read().strip() if libext_content != old_content: run_prepare_war = True Logger.info(format("Will run prepare war cmd since marker file {libext_content_file} has contents which differ.\n" \ "Content of the folder {oozie_libext_dir} changed.")) else: run_prepare_war = True Logger.info(format("Will run prepare war cmd since marker file {libext_content_file} is missing.")) if run_prepare_war: # Time-consuming to run return_code, output = shell.call(command, user=params.oozie_user) if output is None: output = "" if return_code != 0 or "New Oozie WAR file with added".lower() not in output.lower(): message = "Unexpected Oozie WAR preparation output {0}".format(output) Logger.error(message) raise Fail(message) # Generate marker files File(prepare_war_cmd_file, content=command_to_file, mode=0644, ) File(libext_content_file, content=libext_content, mode=0644, ) else: Logger.info(format("No need to run prepare-war since marker file {prepare_war_cmd_file} already exists."))