class KerberosUtil(object): """Util class for Kerberos installation""" def __init__(self, kdc_host, kdc_domain, krb_template_conf, node_list): self.kdc_host = kdc_host self.kdc_domain = kdc_domain self.krb_template_conf = krb_template_conf if node_list: self.list_of_hosts = node_list.append(kdc_host) else: self.list_of_hosts = [kdc_host] tinctest.logger.info("list_of_hosts - %s" %self.list_of_hosts) self.rpmutil = RPMUtil() self.service_cmd = "sudo /sbin/service" self.kadmin_cmd = "sudo /usr/sbin/kadmin.local -q " self.kdb5_cmd = "/usr/sbin/kdb5_util" self.login_user = self._get_login_user() # get current logged-in user self.KRB_PKG_LIST = [ "krb5-server", "krb5-libs", "krb5-workstation" ] self.REALM = "HD.PIVOTAL.COM" self.KRB_CONF = "/etc/krb5.conf" self.KRB_CONF_TEMPLATE = "krb5.conf.t" self.KDC_CONF = "/var/kerberos/krb5kdc/kdc.conf" self.KDC_CONF_TEMPLATE = "kdc.conf" self.KADMIN_ACL_CONF = "/var/kerberos/krb5kdc/kadm5.acl" self.KADMIN_ACL_CONF_TEMPLATE = "kadm5.acl" self.PRINCIPALS = [ "hdfs", "yarn", "mapred", "HTTP" ] def _get_login_user(self): res = {} run_shell_command("whoami", "Get logged-in user", res) return res['stdout'].split('\n')[0] def install_kerberos(self): """ Iterates through the kerberos package list and installs them using yum in rpm util module """ for pkg in self.KRB_PKG_LIST: if not self.rpmutil.is_pkg_installed(pkg): if not self.rpmutil.install_package_using_yum(pkg, True): raise KerberosUtilException("Couln't install kerberos package - %s"%pkg) def _get_domain_name(self): hostname = self.kdc_host if hostname.find('.') >= 0: domain = hostname[ hostname.find('.')+1 : ] return domain else: raise KerberosUtilException("hostname is not fully qualified domain name : %s" %hostname) def install_kerberos_conf(self): """ Update the kerberos configuration files according the env and copy in appropriate locations """ transforms = { "%DOMAIN%" : self.kdc_domain, "%HOSTNAME%" : self.kdc_host } input_file_path = local_path(self.krb_template_conf + "/" + self.KRB_CONF_TEMPLATE) output_file_path = local_path(self.krb_template_conf + "/" + self.KRB_CONF_TEMPLATE[:-2]) with open(input_file_path, 'r') as input: with open(output_file_path, 'w') as output: for line in input.readlines(): for key,value in transforms.iteritems(): line = re.sub(key,value,line) output.write(line) cmd_str = "sudo cp %s %s" %(output_file_path, self.KRB_CONF) if not run_shell_command(cmd_str,"Copying krb5.conf"): raise KerberosUtilException("Couldn't copy krb5.conf") cmd_str = "sudo cp %s %s" %(local_path(self.krb_template_conf + "/" + self.KDC_CONF_TEMPLATE), self.KDC_CONF) if not run_shell_command(cmd_str,"Copying kdc.conf"): raise KerberosUtilException("Couldn't copy kdc.conf") cmd_str = "sudo cp %s %s" %(local_path(self.krb_template_conf + "/" + self.KADMIN_ACL_CONF_TEMPLATE), self.KADMIN_ACL_CONF) if not run_shell_command(cmd_str,"Copying kadm5.acl"): raise KerberosUtilException("Couldn't copy kadm5.acl") def create_krb_database(self): """ Initializes kerberos database """ cmd_str = "sudo %s -P changeme create -s" %self.kdb5_cmd if not run_shell_command(cmd_str, "Creating Kerberos database"): raise KerberosUtilException("Exception occured while creating Kerberos Databse!!") def start_server(self): """ Starts Kerberos server """ if not run_shell_command("%s krb5kdc restart" %self.service_cmd): raise KerberosUtilException("Couln't start kerberos service : krb5kdc\nCheck out the logs in /var/log/krb5kdc.log") if not run_shell_command("%s kadmin restart" %self.service_cmd): raise KerberosUtilException("Couln't start kerberos service : kadmin") def stop_server(self): """ Stops kerberos server """ run_shell_command("%s krb5kdc stop" %self.service_cmd) run_shell_command("%s kadmin stop" %self.service_cmd) def add_krb_principals(self, hosts_list): """ Add principal to kerberos server """ for host in hosts_list: for principal in self.PRINCIPALS: run_shell_command(self.kadmin_cmd + "\"addprinc -randkey %s/%s@%s\"" %(principal, host, self.REALM)) # creating principal for log-in user for KDC host only run_shell_command(self.kadmin_cmd + "\"addprinc -randkey %s/%s@%s\"" %(self.login_user, self.kdc_host, self.REALM)) def create_keytab_files(self, hosts_list): """ Create all keytab files and move them to /keytab directory, since we're sudo'ing everything we'll create keytab directory right under / """ for host in hosts_list: cmd_str = "ssh -o StrictHostKeyChecking=no %s \"sudo rm -rf /keytab; sudo mkdir /keytab; sudo chmod 777 /keytab\"" %host run_shell_command(cmd_str,"Creating keytab dir for host : %s" %host) # list of principals except HTTP for principal in self.PRINCIPALS[:-1]: keytab = "%s.service.keytab" %principal cmd_str = "%s \"xst -k %s %s/%s@%s HTTP/%s@%s\"" %(self.kadmin_cmd, keytab, principal, host, self.REALM, host, self.REALM) run_shell_command(cmd_str) # remote copy the keytab file cmd_str = "sudo scp -o StrictHostKeyChecking=no %s %s:/keytab" %(keytab, host) run_shell_command(cmd_str, "Copy keytab file to host") # change ownership of the keytab file w.r.t the service cmd_str = "ssh -o StrictHostKeyChecking=no %s \"sudo chown %s:hadoop /keytab/%s\"" %(host, principal, keytab) run_shell_command(cmd_str, "Change ownership of keytab file") # change the access rights of the keytab files cmd_str = "ssh -o StrictHostKeyChecking=no %s \"sudo chmod 400 /keytab/*\"" %host run_shell_command(cmd_str, "Change access rights of keytab file") # remove the keytab files so that we won't have issues # creating keytab files for other hosts cmd_str = "sudo rm *.keytab" run_shell_command(cmd_str,"Remove the keytab files") keytab = "%s.service.keytab" %self.login_user cmd_str = "%s \"xst -k %s %s/%s@%s HTTP/%s@%s\"" %(self.kadmin_cmd, keytab, self.login_user, self.kdc_host, self.REALM, self.kdc_host, self.REALM) run_shell_command(cmd_str, "Create keytab file for logged-in user %s" %self.login_user) cmd_str = "sudo chown %s:%s %s" %(self.login_user, self.login_user, keytab) run_shell_command(cmd_str) cmd_str = "cp %s /keytab" %keytab run_shell_command(cmd_str) def get_kerberos_ticket(self, user, host = None): """ Gets kerberos ticket for the input user on host """ res = {} run_shell_command("sudo find /usr -name kinit", "kinit command", res) kinit_cmd = res['stdout'].split('\n')[0] if not host: host = self.kdc_host if user != self.login_user: kinit_cmd = "sudo " + kinit_cmd cmd_str = "ssh -o StrictHostKeyChecking=no %s \"%s -k -t /keytab/%s.service.keytab %s/%s@%s\"" %(host, kinit_cmd, user, user, host, self.REALM) run_shell_command(cmd_str, "kinit for user: %s " %user) def clean(self): """ Cleanup process for: 1. Destroying previous tickets 2. Stoping the server 3. Destroying kerberos database """ cmd_str = "kdestroy; sudo kdestroy" run_shell_command(cmd_str,"Destroy the tickets") self.stop_server() cmd_str = "sudo %s destroy -f" %self.kdb5_cmd run_shell_command(cmd_str,"Clean up Kerberos") def configure_server(self): """ Init method for configuring kerberos server """ self.clean() self.install_kerberos() self.install_kerberos_conf() self.create_krb_database() self.start_server() self.add_krb_principals(self.list_of_hosts) self.create_keytab_files(self.list_of_hosts)
class KerberosUtil(object): """Util class for Kerberos installation""" def __init__(self, kdc_host, kdc_domain, krb_template_conf, node_list): self.kdc_host = kdc_host self.kdc_domain = kdc_domain self.krb_template_conf = krb_template_conf if node_list: self.list_of_hosts = node_list.append(kdc_host) else: self.list_of_hosts = [kdc_host] tinctest.logger.info("list_of_hosts - %s" % self.list_of_hosts) self.rpmutil = RPMUtil() self.service_cmd = "sudo /sbin/service" self.kadmin_cmd = "sudo /usr/sbin/kadmin.local -q " self.kdb5_cmd = "/usr/sbin/kdb5_util" self.login_user = self._get_login_user() # get current logged-in user self.KRB_PKG_LIST = ["krb5-server", "krb5-libs", "krb5-workstation"] self.REALM = "HD.PIVOTAL.COM" self.KRB_CONF = "/etc/krb5.conf" self.KRB_CONF_TEMPLATE = "krb5.conf.t" self.KDC_CONF = "/var/kerberos/krb5kdc/kdc.conf" self.KDC_CONF_TEMPLATE = "kdc.conf" self.KADMIN_ACL_CONF = "/var/kerberos/krb5kdc/kadm5.acl" self.KADMIN_ACL_CONF_TEMPLATE = "kadm5.acl" self.PRINCIPALS = ["hdfs", "yarn", "mapred", "HTTP"] def _get_login_user(self): res = {} run_shell_command("whoami", "Get logged-in user", res) return res['stdout'].split('\n')[0] def install_kerberos(self): """ Iterates through the kerberos package list and installs them using yum in rpm util module """ for pkg in self.KRB_PKG_LIST: if not self.rpmutil.is_pkg_installed(pkg): if not self.rpmutil.install_package_using_yum(pkg, True): raise KerberosUtilException( "Couln't install kerberos package - %s" % pkg) def _get_domain_name(self): hostname = self.kdc_host if hostname.find('.') >= 0: domain = hostname[hostname.find('.') + 1:] return domain else: raise KerberosUtilException( "hostname is not fully qualified domain name : %s" % hostname) def install_kerberos_conf(self): """ Update the kerberos configuration files according the env and copy in appropriate locations """ transforms = {"%DOMAIN%": self.kdc_domain, "%HOSTNAME%": self.kdc_host} input_file_path = local_path(self.krb_template_conf + "/" + self.KRB_CONF_TEMPLATE) output_file_path = local_path(self.krb_template_conf + "/" + self.KRB_CONF_TEMPLATE[:-2]) with open(input_file_path, 'r') as input: with open(output_file_path, 'w') as output: for line in input.readlines(): for key, value in transforms.iteritems(): line = re.sub(key, value, line) output.write(line) cmd_str = "sudo cp %s %s" % (output_file_path, self.KRB_CONF) if not run_shell_command(cmd_str, "Copying krb5.conf"): raise KerberosUtilException("Couldn't copy krb5.conf") cmd_str = "sudo cp %s %s" % (local_path(self.krb_template_conf + "/" + self.KDC_CONF_TEMPLATE), self.KDC_CONF) if not run_shell_command(cmd_str, "Copying kdc.conf"): raise KerberosUtilException("Couldn't copy kdc.conf") cmd_str = "sudo cp %s %s" % (local_path(self.krb_template_conf + "/" + self.KADMIN_ACL_CONF_TEMPLATE), self.KADMIN_ACL_CONF) if not run_shell_command(cmd_str, "Copying kadm5.acl"): raise KerberosUtilException("Couldn't copy kadm5.acl") def create_krb_database(self): """ Initializes kerberos database """ cmd_str = "sudo %s -P changeme create -s" % self.kdb5_cmd if not run_shell_command(cmd_str, "Creating Kerberos database"): raise KerberosUtilException( "Exception occured while creating Kerberos Databse!!") def start_server(self): """ Starts Kerberos server """ if not run_shell_command("%s krb5kdc restart" % self.service_cmd): raise KerberosUtilException( "Couln't start kerberos service : krb5kdc\nCheck out the logs in /var/log/krb5kdc.log" ) if not run_shell_command("%s kadmin restart" % self.service_cmd): raise KerberosUtilException( "Couln't start kerberos service : kadmin") def stop_server(self): """ Stops kerberos server """ run_shell_command("%s krb5kdc stop" % self.service_cmd) run_shell_command("%s kadmin stop" % self.service_cmd) def add_krb_principals(self, hosts_list): """ Add principal to kerberos server """ for host in hosts_list: for principal in self.PRINCIPALS: run_shell_command(self.kadmin_cmd + "\"addprinc -randkey %s/%s@%s\"" % (principal, host, self.REALM)) # creating principal for log-in user for KDC host only run_shell_command(self.kadmin_cmd + "\"addprinc -randkey %s/%s@%s\"" % (self.login_user, self.kdc_host, self.REALM)) def create_keytab_files(self, hosts_list): """ Create all keytab files and move them to /keytab directory, since we're sudo'ing everything we'll create keytab directory right under / """ for host in hosts_list: cmd_str = "ssh -o StrictHostKeyChecking=no %s \"sudo rm -rf /keytab; sudo mkdir /keytab; sudo chmod 777 /keytab\"" % host run_shell_command(cmd_str, "Creating keytab dir for host : %s" % host) # list of principals except HTTP for principal in self.PRINCIPALS[:-1]: keytab = "%s.service.keytab" % principal cmd_str = "%s \"xst -k %s %s/%s@%s HTTP/%s@%s\"" % ( self.kadmin_cmd, keytab, principal, host, self.REALM, host, self.REALM) run_shell_command(cmd_str) # remote copy the keytab file cmd_str = "sudo scp -o StrictHostKeyChecking=no %s %s:/keytab" % ( keytab, host) run_shell_command(cmd_str, "Copy keytab file to host") # change ownership of the keytab file w.r.t the service cmd_str = "ssh -o StrictHostKeyChecking=no %s \"sudo chown %s:hadoop /keytab/%s\"" % ( host, principal, keytab) run_shell_command(cmd_str, "Change ownership of keytab file") # change the access rights of the keytab files cmd_str = "ssh -o StrictHostKeyChecking=no %s \"sudo chmod 400 /keytab/*\"" % host run_shell_command(cmd_str, "Change access rights of keytab file") # remove the keytab files so that we won't have issues # creating keytab files for other hosts cmd_str = "sudo rm *.keytab" run_shell_command(cmd_str, "Remove the keytab files") keytab = "%s.service.keytab" % self.login_user cmd_str = "%s \"xst -k %s %s/%s@%s HTTP/%s@%s\"" % ( self.kadmin_cmd, keytab, self.login_user, self.kdc_host, self.REALM, self.kdc_host, self.REALM) run_shell_command( cmd_str, "Create keytab file for logged-in user %s" % self.login_user) cmd_str = "sudo chown %s:%s %s" % (self.login_user, self.login_user, keytab) run_shell_command(cmd_str) cmd_str = "cp %s /keytab" % keytab run_shell_command(cmd_str) def get_kerberos_ticket(self, user, host=None): """ Gets kerberos ticket for the input user on host """ res = {} run_shell_command("sudo find /usr -name kinit", "kinit command", res) kinit_cmd = res['stdout'].split('\n')[0] if not host: host = self.kdc_host if user != self.login_user: kinit_cmd = "sudo " + kinit_cmd cmd_str = "ssh -o StrictHostKeyChecking=no %s \"%s -k -t /keytab/%s.service.keytab %s/%s@%s\"" % ( host, kinit_cmd, user, user, host, self.REALM) run_shell_command(cmd_str, "kinit for user: %s " % user) def clean(self): """ Cleanup process for: 1. Destroying previous tickets 2. Stoping the server 3. Destroying kerberos database """ cmd_str = "kdestroy; sudo kdestroy" run_shell_command(cmd_str, "Destroy the tickets") self.stop_server() cmd_str = "sudo %s destroy -f" % self.kdb5_cmd run_shell_command(cmd_str, "Clean up Kerberos") def configure_server(self): """ Init method for configuring kerberos server """ self.clean() self.install_kerberos() self.install_kerberos_conf() self.create_krb_database() self.start_server() self.add_krb_principals(self.list_of_hosts) self.create_keytab_files(self.list_of_hosts)
class CDHRpmUtil(HadoopUtil): """Utility for installing PHD Single node clusters using RPMs""" def __init__( self, hadoop_artifact_url, hadoop_install_dir, hadoop_data_dir, template_conf_dir, hostname = 'localhost', secure_hadoop = False ): HadoopUtil.__init__(self, hadoop_artifact_url, hadoop_install_dir, hadoop_data_dir, hostname) self.rpmutil = RPMUtil() self.hostname = hostname self.hadoop_artifact_url = hadoop_artifact_url self.hadoop_install_dir = hadoop_install_dir self.hadoop_binary_loc = '' self.hadoop_data_dir = hadoop_data_dir self.template_conf_dir = local_path(template_conf_dir) self.secure_hadoop = secure_hadoop # Constants # under the hadoop template configuration directory # both the below directories should be present self.SECURE_DIR_NAME = "conf.secure" # secure configuration files location self.NON_SECURE_DIR_NAME = "conf.pseudo" # non-secure configuration files location self.DEPENDENCY_PKGS = [ "fuse-", # eg. fuse-2.8.3-4.el6.x86_64 "fuse-libs", # eg. fuse-libs-2.8.3-4.el6.x86_6 "nc-" # eg. 1.84-22.el6.x86_64" ] self.PKGS_TO_REMOVE = "^hadoop-*|^bigtop-*|^zookeeper-*|^parquet-*" self.HADOOP_ENVS = { "HADOOP_HOME" : "/usr/lib/hadoop/", "HADOOP_COMMON_HOME" : "/usr/lib/hadoop/", "HADOOP_HDFS_HOME" : "/usr/lib/hadoop-hdfs/", "HADOOP_MAPRED_HOME" : "/usr/lib/hadoop-mapreduce/", "YARN_HOME" : "/usr/lib/hadoop-yarn/", "HADOOP_TMP_DIR" : "%s/hadoop-hdfs/cache/" %self.hadoop_data_dir, "MAPRED_TMP_DIR" : "%s/hadoop-mapreduce/cache/" %self.hadoop_data_dir, "YARN_TMP_DIR" : "%s/hadoop-yarn/cache/" %self.hadoop_data_dir, "HADOOP_CONF_DIR" : "/etc/hadoop/conf", "HADOOP_LOG_DIR" : "%s/hadoop-logs/hadoop-hdfs" %self.hadoop_data_dir, "MAPRED_LOG_DIR" : "%s/hadoop-logs/hadoop-mapreduce" %self.hadoop_data_dir, "YARN_LOG_DIR" : "%s/hadoop-logs/hadoop-yarn" %self.hadoop_data_dir } def _remove_installed_pkgs(self): self.rpmutil.erase_all_packages(self.PKGS_TO_REMOVE) def _install_dependency_pkgs(self): for pkg in self.DEPENDENCY_PKGS: if not self.rpmutil.is_pkg_installed("^" + pkg): self.rpmutil.install_package_using_yum(pkg, is_regex_pkg_name = True) def cleanup(self): """ Clean-up process to: 1. kill all the hadoop daemon process from previous runs if any 2. Remove the contents from the hadoop installation & configuration locations """ self.stop_hadoop() cmd_str = "ps aux | awk '/\-Dhadoop/{print $2}' | xargs sudo kill -9" run_shell_command(cmd_str, "Kill zombie hadoop daemons") cmd_str = "sudo rm -rf " for key,value in self.HADOOP_ENVS.iteritems(): cmd_str = cmd_str + value +"* " cmd_str = cmd_str + "/etc/gphd" run_shell_command(cmd_str,"Clean up HDFS files") self._remove_installed_pkgs() def download_binary(self): """ 1. Downloads the hadoop binary 2. Untars the binary into the specified installation location """ # check if the installation exists or not # delete its contents if exist else create a new one if os.path.isdir(self.hadoop_install_dir): cmd_str = "sudo rm -rf %s/*" %self.hadoop_install_dir else: cmd_str = "mkdir -p %s" %self.hadoop_install_dir run_shell_command(cmd_str,"Check Hadoop install directory") res = {'rc':0, 'stdout':'', 'stderr':''} run_shell_command("basename %s" %self.hadoop_artifact_url, "To get binary name", res) binary_name = res['stdout'].split('\n')[0] tinctest.logger.debug("Hadoop Binary - %s" %binary_name) binary_path = os.path.join(self.hadoop_install_dir, binary_name) tinctest.logger.debug("Hadoop Binary Path - %s" %binary_path) cmd_str = "wget -O %s %s" %(binary_path, self.hadoop_artifact_url) res = {} result = run_shell_command(cmd_str, "Download binary", res) if not result: raise Exception("Failed to download hadoop binary: %s" %res['stderr']) res = {} cmd_str = "sudo yum --nogpgcheck localinstall %s -y" %binary_path result = run_shell_command(cmd_str, "Extract binary", res) if not result: raise Exception("Failed to extract hadoop binary: %s" %res['stderr']) def install_binary(self): """ Installs RPM binaries of: 1. utility eg. bigtop utils 2. zookeeper 3. hadoop """ binaries_list = [ "zookeeper", "hadoop-hdfs-namenode", "hadoop-hdfs-secondarynamenode", \ "hadoop-hdfs-datanode", "hadoop-yarn-resourcemanager", "hadoop-mapreduce", \ "hadoop-yarn-nodemanager", "hadoop-mapreduce-historyserver", \ "hadoop-yarn-proxyserver" ] yum_cmd = "sudo yum clean all; sudo yum install -y " for binary in binaries_list: res = {} cmd_str = yum_cmd + binary if not run_shell_command(cmd_str, "Installing %s" %binary, res): raise Exception("Failed to install %s : %s" %(binary, res['stderr'])) def install_hadoop_configurations(self): """ Based on type of installation secure or non-secure, installs the updated template configuration files and makes required changes to the env files. """ ##TODO: Create separate directories for secure & non-secure ## in the hadoop conf dir and copy the update configs in respective directories # check the type of hadoop installation - secure or non secure if self.secure_hadoop: # SECURE_DIR_NAME is expected to be present under template configuration directory secure_conf = os.path.join(self.template_conf_dir, self.SECURE_DIR_NAME) super(CDHRpmUtil,self).install_hadoop_configurations(secure_conf, self.HADOOP_ENVS['HADOOP_CONF_DIR']) # update env files in /etc/default/hadoop* if self.hadoop_data_dir.endswith('/'): self.hadoop_data_dir = self.hadoop_data_dir[:-1] cmd_str = "for env_file in `ls /etc/default/hadoop*`;" \ "do " \ "sudo sed -r -i 's:\/var\/log(\/gphd)?:\%s\/hadoop-logs:g' ${env_file};" \ "done" %self.hadoop_data_dir run_shell_command(cmd_str, "Update env files in /etc/default/hadoop*") # update hadoop-env.sh file hadoop_env_file = os.path.join( self.HADOOP_ENVS['HADOOP_CONF_DIR'], "hadoop-env.sh" ) if not os.path.exists(hadoop_env_file): tinctest.logger.info("hadoop-env.sh not found..creating a new one!") run_shell_command("sudo touch %s" %hadoop_env_file, "Create hadoop-env.sh file") # give write permissions on the file self.give_others_write_perm(hadoop_env_file) text = "\n### Added env variables\n" \ "export JAVA_HOME=%s\n" \ "export HADOOP_OPTS=\"-Djava.net.preferIPv4Stack=true " \ "-Djava.library.path=$HADOOP_HOME/lib/native/\"\n" %self.get_java_home() self.append_text_to_file(hadoop_env_file,text) # revert back to old permissions self.remove_others_write_perm(hadoop_env_file) # update env files hadoop-hdfs-datanode & hadoop hdfs_datanode_env = "/etc/default/hadoop-hdfs-datanode" self.give_others_write_perm(hdfs_datanode_env) text = "\n### Secure env variables\n" \ "export HADOOP_SECURE_DN_USER=hdfs\n" \ "export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/hdfs\n" \ "export HADOOP_PID_DIR=/var/run/hadoop-hdfs/\n" \ "export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}\n" \ "export JSVC_HOME=/usr/libexec/bigtop-utils\n" self.append_text_to_file(hdfs_datanode_env, text) self.remove_others_write_perm(hdfs_datanode_env) # change the permissions of container-executor container_bin_path = os.path.join(self.HADOOP_ENVS['YARN_HOME'],'bin/container-executor') cmd_str = "sudo chown root:yarn %s" %container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod 050 %s" %container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod u+s %s" %container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod g+s %s" %container_bin_path run_shell_command(cmd_str) else: # NON_SECURE_DIR_NAME is expected to be present under template configuration directory non_secure_conf = os.path.join(self.template_conf_dir, self.NON_SECURE_DIR_NAME) super(CDHRpmUtil, self).install_hadoop_configurations(non_secure_conf, self.HADOOP_ENVS['HADOOP_CONF_DIR']) def start_hdfs(self): # format namenode cmd_str = "sudo -u hdfs hdfs --config %s namenode -format" %self.HADOOP_ENVS['HADOOP_CONF_DIR'] namenode_formatted = run_shell_command(cmd_str) if not namenode_formatted: raise Exception("Exception in namnode formatting") # start namenode cmd_str = "sudo /etc/init.d/hadoop-hdfs-namenode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Namenode not started") cmd_str = "sudo /etc/init.d/hadoop-hdfs-datanode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Namenode not started") cmd_str = "sudo /etc/init.d/hadoop-hdfs-secondarynamenode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Secondary namenode not started") def set_hdfs_permissions(self): if self.secure_hadoop: hdfs_cmd = "sudo hdfs dfs" else: hdfs_cmd = "sudo -u hdfs hdfs dfs" # set hdfs permissions cmd_str = "%s -chmod -R 777 /" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir /tmp" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod 777 /tmp" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir -p /var/log/hadoop-yarn" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir -p /user" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir -p /user/history" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod -R 1777 /user/history" %hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod -R 777 /user/" %hdfs_cmd run_shell_command(cmd_str) def put_file_in_hdfs(self, input_path, hdfs_path): if hdfs_path.rfind('/') > 0: hdfs_dir = hdfs_path[:hdfs_path.rfind('/')] cmd_str = "hdfs dfs -mkdir -p %s" %hdfs_dir run_shell_command(cmd_str, "Creating parent HDFS dir for path %s" %input_path) cmd_str = "hdfs dfs -put %s %s" %(input_path, hdfs_path) run_shell_command(cmd_str, "Copy to HDFS : file %s" %input_path) def remove_file_from_hdfs(self, hdfs_path): cmd_str = "hdfs dfs -rm -r %s" %hdfs_path run_shell_command(cmd_str, "Remove %s from HDFS" %hdfs_path) def start_yarn(self): # start yarn daemons # start resource manager self.set_hdfs_permissions() cmd_str = "sudo /etc/init.d/hadoop-yarn-resourcemanager start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Resource manager not started") # start node manager cmd_str = "sudo /etc/init.d/hadoop-yarn-nodemanager start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Node manager not started") # start history server cmd_str = "sudo /etc/init.d/hadoop-mapreduce-historyserver start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("History server not started") def start_hadoop(self): """ Starts the PHD cluster and checks the JPS status """ self.start_hdfs() self.start_yarn() res = {} # run jps command & check for hadoop daemons cmd_str = "sudo jps" run_shell_command(cmd_str, "Check Hadoop Daemons", res) result = res['stdout'] tinctest.logger.info("\n**** Following Hadoop Daemons started **** \n%s" %result) tinctest.logger.info("*** Hadoop Started Successfully!!") def stop_hadoop(self): """ Stops the PHD cluster """ run_shell_command("sudo /etc/init.d/hadoop-mapreduce-historyserver stop", "Stop history-server") run_shell_command("sudo /etc/init.d/hadoop-yarn-nodemanager stop", "Stop Node manager") run_shell_command("sudo /etc/init.d/hadoop-yarn-resourcemanager stop", "Stop resourcemanager") run_shell_command("sudo /etc/init.d/hadoop-hdfs-secondarynamenode stop", "Stop secondarynamenode") run_shell_command("sudo /etc/init.d/hadoop-hdfs-datanode stop", "Stop datanode") run_shell_command("sudo /etc/init.d/hadoop-hdfs-namenode stop", "Stop namenode") def get_hadoop_env(self): """ Returns a dictionary of hadoop environment variables like: 1. HADOOP_HOME 2. HADOOP_CONF_DIR 3. HADOOP_COMMON_HOME 4. HADOOP_HDFS_HOME 5. YARN_HOME 6. HADOOP_MAPRED_HOME """ return self.HADOOP_ENVS def init_cluster(self): """ Init point for starting up the PHD cluster """ self.download_binary() self.cleanup() self.install_binary() self.install_hadoop_configurations() self.start_hadoop()
class CDHRpmUtil(HadoopUtil): """Utility for installing PHD Single node clusters using RPMs""" def __init__(self, hadoop_artifact_url, hadoop_install_dir, hadoop_data_dir, template_conf_dir, hostname='localhost', secure_hadoop=False): HadoopUtil.__init__(self, hadoop_artifact_url, hadoop_install_dir, hadoop_data_dir, hostname) self.rpmutil = RPMUtil() self.hostname = hostname self.hadoop_artifact_url = hadoop_artifact_url self.hadoop_install_dir = hadoop_install_dir self.hadoop_binary_loc = '' self.hadoop_data_dir = hadoop_data_dir self.template_conf_dir = local_path(template_conf_dir) self.secure_hadoop = secure_hadoop # Constants # under the hadoop template configuration directory # both the below directories should be present self.SECURE_DIR_NAME = "conf.secure" # secure configuration files location self.NON_SECURE_DIR_NAME = "conf.pseudo" # non-secure configuration files location self.DEPENDENCY_PKGS = [ "fuse-", # eg. fuse-2.8.3-4.el6.x86_64 "fuse-libs", # eg. fuse-libs-2.8.3-4.el6.x86_6 "nc-" # eg. 1.84-22.el6.x86_64" ] self.PKGS_TO_REMOVE = "^hadoop-*|^bigtop-*|^zookeeper-*|^parquet-*" self.HADOOP_ENVS = { "HADOOP_HOME": "/usr/lib/hadoop/", "HADOOP_COMMON_HOME": "/usr/lib/hadoop/", "HADOOP_HDFS_HOME": "/usr/lib/hadoop-hdfs/", "HADOOP_MAPRED_HOME": "/usr/lib/hadoop-mapreduce/", "YARN_HOME": "/usr/lib/hadoop-yarn/", "HADOOP_TMP_DIR": "%s/hadoop-hdfs/cache/" % self.hadoop_data_dir, "MAPRED_TMP_DIR": "%s/hadoop-mapreduce/cache/" % self.hadoop_data_dir, "YARN_TMP_DIR": "%s/hadoop-yarn/cache/" % self.hadoop_data_dir, "HADOOP_CONF_DIR": "/etc/hadoop/conf", "HADOOP_LOG_DIR": "%s/hadoop-logs/hadoop-hdfs" % self.hadoop_data_dir, "MAPRED_LOG_DIR": "%s/hadoop-logs/hadoop-mapreduce" % self.hadoop_data_dir, "YARN_LOG_DIR": "%s/hadoop-logs/hadoop-yarn" % self.hadoop_data_dir } def _remove_installed_pkgs(self): self.rpmutil.erase_all_packages(self.PKGS_TO_REMOVE) def _install_dependency_pkgs(self): for pkg in self.DEPENDENCY_PKGS: if not self.rpmutil.is_pkg_installed("^" + pkg): self.rpmutil.install_package_using_yum(pkg, is_regex_pkg_name=True) def cleanup(self): """ Clean-up process to: 1. kill all the hadoop daemon process from previous runs if any 2. Remove the contents from the hadoop installation & configuration locations """ self.stop_hadoop() cmd_str = "ps aux | awk '/\-Dhadoop/{print $2}' | xargs sudo kill -9" run_shell_command(cmd_str, "Kill zombie hadoop daemons") cmd_str = "sudo rm -rf " for key, value in self.HADOOP_ENVS.iteritems(): cmd_str = cmd_str + value + "* " cmd_str = cmd_str + "/etc/gphd" run_shell_command(cmd_str, "Clean up HDFS files") self._remove_installed_pkgs() def download_binary(self): """ 1. Downloads the hadoop binary 2. Untars the binary into the specified installation location """ # check if the installation exists or not # delete its contents if exist else create a new one if os.path.isdir(self.hadoop_install_dir): cmd_str = "sudo rm -rf %s/*" % self.hadoop_install_dir else: cmd_str = "mkdir -p %s" % self.hadoop_install_dir run_shell_command(cmd_str, "Check Hadoop install directory") res = {'rc': 0, 'stdout': '', 'stderr': ''} run_shell_command("basename %s" % self.hadoop_artifact_url, "To get binary name", res) binary_name = res['stdout'].split('\n')[0] tinctest.logger.debug("Hadoop Binary - %s" % binary_name) binary_path = os.path.join(self.hadoop_install_dir, binary_name) tinctest.logger.debug("Hadoop Binary Path - %s" % binary_path) cmd_str = "wget -O %s %s" % (binary_path, self.hadoop_artifact_url) res = {} result = run_shell_command(cmd_str, "Download binary", res) if not result: raise Exception("Failed to download hadoop binary: %s" % res['stderr']) res = {} cmd_str = "sudo yum --nogpgcheck localinstall %s -y" % binary_path result = run_shell_command(cmd_str, "Extract binary", res) if not result: raise Exception("Failed to extract hadoop binary: %s" % res['stderr']) def install_binary(self): """ Installs RPM binaries of: 1. utility eg. bigtop utils 2. zookeeper 3. hadoop """ binaries_list = [ "zookeeper", "hadoop-hdfs-namenode", "hadoop-hdfs-secondarynamenode", \ "hadoop-hdfs-datanode", "hadoop-yarn-resourcemanager", "hadoop-mapreduce", \ "hadoop-yarn-nodemanager", "hadoop-mapreduce-historyserver", \ "hadoop-yarn-proxyserver" ] yum_cmd = "sudo yum clean all; sudo yum install -y " for binary in binaries_list: res = {} cmd_str = yum_cmd + binary if not run_shell_command(cmd_str, "Installing %s" % binary, res): raise Exception("Failed to install %s : %s" % (binary, res['stderr'])) def install_hadoop_configurations(self): """ Based on type of installation secure or non-secure, installs the updated template configuration files and makes required changes to the env files. """ ##TODO: Create separate directories for secure & non-secure ## in the hadoop conf dir and copy the update configs in respective directories # check the type of hadoop installation - secure or non secure if self.secure_hadoop: # SECURE_DIR_NAME is expected to be present under template configuration directory secure_conf = os.path.join(self.template_conf_dir, self.SECURE_DIR_NAME) super(CDHRpmUtil, self).install_hadoop_configurations( secure_conf, self.HADOOP_ENVS['HADOOP_CONF_DIR']) # update env files in /etc/default/hadoop* if self.hadoop_data_dir.endswith('/'): self.hadoop_data_dir = self.hadoop_data_dir[:-1] cmd_str = "for env_file in `ls /etc/default/hadoop*`;" \ "do " \ "sudo sed -r -i 's:\/var\/log(\/gphd)?:\%s\/hadoop-logs:g' ${env_file};" \ "done" %self.hadoop_data_dir run_shell_command(cmd_str, "Update env files in /etc/default/hadoop*") # update hadoop-env.sh file hadoop_env_file = os.path.join(self.HADOOP_ENVS['HADOOP_CONF_DIR'], "hadoop-env.sh") if not os.path.exists(hadoop_env_file): tinctest.logger.info( "hadoop-env.sh not found..creating a new one!") run_shell_command("sudo touch %s" % hadoop_env_file, "Create hadoop-env.sh file") # give write permissions on the file self.give_others_write_perm(hadoop_env_file) text = "\n### Added env variables\n" \ "export JAVA_HOME=%s\n" \ "export HADOOP_OPTS=\"-Djava.net.preferIPv4Stack=true " \ "-Djava.library.path=$HADOOP_HOME/lib/native/\"\n" %self.get_java_home() self.append_text_to_file(hadoop_env_file, text) # revert back to old permissions self.remove_others_write_perm(hadoop_env_file) # update env files hadoop-hdfs-datanode & hadoop hdfs_datanode_env = "/etc/default/hadoop-hdfs-datanode" self.give_others_write_perm(hdfs_datanode_env) text = "\n### Secure env variables\n" \ "export HADOOP_SECURE_DN_USER=hdfs\n" \ "export HADOOP_SECURE_DN_LOG_DIR=${HADOOP_LOG_DIR}/hdfs\n" \ "export HADOOP_PID_DIR=/var/run/hadoop-hdfs/\n" \ "export HADOOP_SECURE_DN_PID_DIR=${HADOOP_PID_DIR}\n" \ "export JSVC_HOME=/usr/libexec/bigtop-utils\n" self.append_text_to_file(hdfs_datanode_env, text) self.remove_others_write_perm(hdfs_datanode_env) # change the permissions of container-executor container_bin_path = os.path.join(self.HADOOP_ENVS['YARN_HOME'], 'bin/container-executor') cmd_str = "sudo chown root:yarn %s" % container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod 050 %s" % container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod u+s %s" % container_bin_path run_shell_command(cmd_str) cmd_str = "sudo chmod g+s %s" % container_bin_path run_shell_command(cmd_str) else: # NON_SECURE_DIR_NAME is expected to be present under template configuration directory non_secure_conf = os.path.join(self.template_conf_dir, self.NON_SECURE_DIR_NAME) super(CDHRpmUtil, self).install_hadoop_configurations( non_secure_conf, self.HADOOP_ENVS['HADOOP_CONF_DIR']) def start_hdfs(self): # format namenode cmd_str = "sudo -u hdfs hdfs --config %s namenode -format" % self.HADOOP_ENVS[ 'HADOOP_CONF_DIR'] namenode_formatted = run_shell_command(cmd_str) if not namenode_formatted: raise Exception("Exception in namnode formatting") # start namenode cmd_str = "sudo /etc/init.d/hadoop-hdfs-namenode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Namenode not started") cmd_str = "sudo /etc/init.d/hadoop-hdfs-datanode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Namenode not started") cmd_str = "sudo /etc/init.d/hadoop-hdfs-secondarynamenode start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Secondary namenode not started") def set_hdfs_permissions(self): if self.secure_hadoop: hdfs_cmd = "sudo hdfs dfs" else: hdfs_cmd = "sudo -u hdfs hdfs dfs" # set hdfs permissions cmd_str = "%s -chmod -R 777 /" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir /tmp" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod 777 /tmp" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir -p /var/log/hadoop-yarn" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir -p /user" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -mkdir -p /user/history" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod -R 1777 /user/history" % hdfs_cmd run_shell_command(cmd_str) cmd_str = "%s -chmod -R 777 /user/" % hdfs_cmd run_shell_command(cmd_str) def put_file_in_hdfs(self, input_path, hdfs_path): if hdfs_path.rfind('/') > 0: hdfs_dir = hdfs_path[:hdfs_path.rfind('/')] cmd_str = "hdfs dfs -mkdir -p %s" % hdfs_dir run_shell_command( cmd_str, "Creating parent HDFS dir for path %s" % input_path) cmd_str = "hdfs dfs -put %s %s" % (input_path, hdfs_path) run_shell_command(cmd_str, "Copy to HDFS : file %s" % input_path) def remove_file_from_hdfs(self, hdfs_path): cmd_str = "hdfs dfs -rm -r %s" % hdfs_path run_shell_command(cmd_str, "Remove %s from HDFS" % hdfs_path) def start_yarn(self): # start yarn daemons # start resource manager self.set_hdfs_permissions() cmd_str = "sudo /etc/init.d/hadoop-yarn-resourcemanager start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Resource manager not started") # start node manager cmd_str = "sudo /etc/init.d/hadoop-yarn-nodemanager start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("Node manager not started") # start history server cmd_str = "sudo /etc/init.d/hadoop-mapreduce-historyserver start" namenode_started = run_shell_command(cmd_str) if not namenode_started: raise Exception("History server not started") def start_hadoop(self): """ Starts the PHD cluster and checks the JPS status """ self.start_hdfs() self.start_yarn() res = {} # run jps command & check for hadoop daemons cmd_str = "sudo jps" run_shell_command(cmd_str, "Check Hadoop Daemons", res) result = res['stdout'] tinctest.logger.info( "\n**** Following Hadoop Daemons started **** \n%s" % result) tinctest.logger.info("*** Hadoop Started Successfully!!") def stop_hadoop(self): """ Stops the PHD cluster """ run_shell_command( "sudo /etc/init.d/hadoop-mapreduce-historyserver stop", "Stop history-server") run_shell_command("sudo /etc/init.d/hadoop-yarn-nodemanager stop", "Stop Node manager") run_shell_command("sudo /etc/init.d/hadoop-yarn-resourcemanager stop", "Stop resourcemanager") run_shell_command( "sudo /etc/init.d/hadoop-hdfs-secondarynamenode stop", "Stop secondarynamenode") run_shell_command("sudo /etc/init.d/hadoop-hdfs-datanode stop", "Stop datanode") run_shell_command("sudo /etc/init.d/hadoop-hdfs-namenode stop", "Stop namenode") def get_hadoop_env(self): """ Returns a dictionary of hadoop environment variables like: 1. HADOOP_HOME 2. HADOOP_CONF_DIR 3. HADOOP_COMMON_HOME 4. HADOOP_HDFS_HOME 5. YARN_HOME 6. HADOOP_MAPRED_HOME """ return self.HADOOP_ENVS def init_cluster(self): """ Init point for starting up the PHD cluster """ self.download_binary() self.cleanup() self.install_binary() self.install_hadoop_configurations() self.start_hadoop()