def setup(cls, S3_AWS_ACCESS_KEY=None, S3_AWS_SECRET=None): Machine.installPackageWithPip(packages="awscli", hosts=Hadoop.getAllNodes(), logoutput=True) aws_home = "/root/.aws" if not os.path.exists(aws_home): Machine.makedirs(ADMIN_USER, GATEWAY_NODE, "/root/.aws", ADMIN_PWD) util.writeToFile("[default]\nregion = us-west-2\noutput=json", os.path.join(ARTIFACTS_DIR, "config")) if S3_AWS_ACCESS_KEY: cls._aws_access_key_id = S3_AWS_ACCESS_KEY else: cls._aws_access_key_id = Config.get('machine', 'S3_AWS_ACCESS_KEY') if S3_AWS_SECRET: cls._aws_secret_access_key = S3_AWS_SECRET else: cls._aws_secret_access_key = Config.get( 'machine', 'S3_AWS_SECRET') util.writeToFile( "[default]\naws_access_key_id = %s\naws_secret_access_key = %s" % (cls._aws_access_key_id, cls._aws_secret_access_key), os.path.join(ARTIFACTS_DIR, "credentials")) Machine.runas( ADMIN_USER, "chown %s '%s/config'" % (ADMIN_USER, ARTIFACTS_DIR), GATEWAY_NODE, ADMIN_PWD) Machine.runas( ADMIN_USER, "chown %s '%s/credentials'" % (ADMIN_USER, ARTIFACTS_DIR), GATEWAY_NODE, ADMIN_PWD) Machine.copy(os.path.join(ARTIFACTS_DIR, "config"), aws_home, ADMIN_USER, ADMIN_PWD) Machine.copy(os.path.join(ARTIFACTS_DIR, "credentials"), aws_home, ADMIN_USER, ADMIN_PWD)
def getAttemptIdsForJobIdAndStoreInFile(jobId, myTask="map"): artifactsDir = CommonHadoopEnv.getArtifactsDir() saveFilePath = os.path.join(artifactsDir,"AttemptIdFile") listAttemptCmd = " job -list-attempt-ids "+ jobId +" "+ myTask + " running " out=Hadoop.run(listAttemptCmd) buf = StringIO.StringIO(out[1]) util.writeToFile(out[1],saveFilePath)
def run(self): """ Move files to HDFS Input Dir after each interval period for n times. """ for count in range(0, self.times): text = "hello world \n Testing HDFS Word count Spark application" random_name = ''.join( random.choice(string.lowercase) for i in range(5)) filename = os.path.join(Config.getEnv('ARTIFACTS_DIR'), random_name) util.writeToFile(text, filename, isAppend=False) max_retry = 3 count = 0 while count < max_retry: try: if "hdfs://ns2" in self.hdfs_input_dir: cp_status = HDFS.copyFromLocal(filename, "hdfs://ns2/tmp", enableDebug=True) else: cp_status = HDFS.copyFromLocal(filename, "/tmp", enableDebug=True) assert cp_status[ 0] == 0, "Failed to copy file to HDFS 'tmp'" logger.info("copyFromLocal command finished for %s" % filename) if "hdfs://ns2" in self.hdfs_input_dir: mv_status = HDFS.mv(None, "hdfs://ns2/tmp/" + random_name, self.hdfs_input_dir, config=None) else: mv_status = HDFS.mv(None, "/tmp/" + random_name, self.hdfs_input_dir, config=None) assert mv_status[ 0] == 0, "Failed to move file from 'tmp' to test directory" except: if count < max_retry: count = count + 1 logger.info( "File copy into HDFS test directory failed after %s attempts, retrying after 120s sleep interval" % count) time.sleep(120) else: logger.error( "Failed to copy file into HDFS test directory, expect failures in HDFSWordCOunt" ) else: break logger.info("%s moved to %s" % (filename, self.hdfs_input_dir)) logger.info("sleeping for %s seconds" % self.interval) time.sleep(self.interval)
def background_job_when_master_upgrade(cls): ''' Start a background application which runs while component master service gets upgraded :return: ''' from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Background Job test setup when upgrading Hive started" ) logger.info("Creating hive tables for short background jobs") query = "drop table if exists shortlr_hive_verify;\n" query += "create table shortlr_hive_verify (userid string, age int);\n" query += "drop table if exists shortlr_bline_verify;\n" query += "create table shortlr_bline_verify (userid string, age int);\n" query += "drop table if exists shortlr_bline_verify;\n" query += "create table shortlr_bline_verify (userid string, age int);\n" short_bgjob_setupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'shortlrsetup.sql') util.writeToFile(query, short_bgjob_setupfile) exit_code, stdout = Hive.run("-f " + short_bgjob_setupfile) if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][BGJob] Background Job test setup when Hive upgrades failed due to exitcode = %d" % exit_code) logger.info("Running the Background Job when upgrading Hive") UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Long running job for Hive component upgrades started" ) setqueue = "" if Hive.isTezEnabled(): setqueue = "set tez.queue.name=%s; " % cls._yarn_queue else: setqueue = "set mapred.job.queue.name=%s; " % cls._yarn_queue logger.info("**** Running Hive CLI Test ****") query = setqueue + " insert overwrite table shortlr_hive_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl cls._shortbgj_hive_process = Hive.runQuery(query, background=True) # Sleeping for 10 seconds to make sure that query initializes before Metastore is restarted time.sleep(10) logger.info("**** Running Beeline CLI Test ****") query = setqueue + "\ninsert overwrite table shortlr_bline_verify select userid, avg(age) from %s group by userid order by userid;" % cls._bgjtest_tbl cls._shortbgj_bline_process = Hive.runQueryOnBeeline(query, readFromFile=True, background=True) UpgradePerNode.reportProgress( "[INFO][Hive][BGJob] Background Job test setup when Hive upgrades finished" )
def modifyConfigChaosMonkey(changes, confDir, updatedConfDir, nodes, isFirstUpdate=True): if Machine.type() == 'Windows': # clean up the backup Machine.rm(None, Machine.getfqdn(), BACKUP_CONFIG_LOCATION, isdir=True, passwd=None) util.copyDir(confDir, BACKUP_CONFIG_LOCATION) if isFirstUpdate: tmpConfDir = os.path.join( ARTIFACTS_DIR, 'tmpModifyConfDir_' + str(int(round(time.time() * 1000)))) Config.set(PYTHON_CONFIG_NAME, TMP_CONF_DIR_VAR, tmpConfDir, overwrite=True) tmpConfDir = Config.get(PYTHON_CONFIG_NAME, TMP_CONF_DIR_VAR) if isFirstUpdate: util.copyDir(confDir, tmpConfDir) for filename, values in changes.items(): filepath = os.path.join(tmpConfDir, filename) if os.path.isfile(filepath): logger.info("Modifying file: %s", filepath) _fname, fext = os.path.splitext(filepath) if fext == ".xml": lines = file(filepath, 'r').readlines() del lines[-1] file(filepath, 'w').writelines(lines) logger.info("Modifying file: %s", filepath) util.writeToFile(values, filepath, isAppend=True) # in windows world copy the configs back to the src location if Machine.type() == 'Windows': for node in nodes: for filename in changes.keys(): Machine.copyFromLocal(None, node, os.path.join(tmpConfDir, filename), os.path.join(confDir, filename), passwd=None) else: for node in nodes: Machine.rm(None, node, updatedConfDir, isdir=True) Machine.copyFromLocal(None, node, tmpConfDir, updatedConfDir)
def reportProgress(cls, message, is_info_message): message = message + "\n" timestamp = datetime.datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S,%f')[:-3] if is_info_message: logger.info(message) util.writeToFile( timestamp + "|INFO|" + message.encode('utf-8'), cls.UPGRADE_STATUS_LOG_FILE, isAppend=True ) else: logger.error(message) util.writeToFile( timestamp + "|ERROR|" + message.encode('utf-8'), cls.UPGRADE_STATUS_LOG_FILE, isAppend=True )
def doSetup(cls, hdfs_test_dir, tbl_name, num_of_rows, type): from beaver.component.hive import Hive from beaver.component.rollingupgrade.ruUpgrade import UpgradePerNode logger.info("Generating test table dataset with %d rows" % num_of_rows) test_data_file = os.path.join(Config.getEnv('ARTIFACTS_DIR'), tbl_name + ".dat") f = open(test_data_file, 'w') userid = 100000 for i in xrange(num_of_rows): for j in range(random.randint(3, 8)): f.write("%d|%d\n" % (userid + i, random.randint(10, 80))) f.close() hdfs_tbl_dir = hdfs_test_dir + "/" + tbl_name logger.info("Copying the test dataset to HDFS directory '%s'" % hdfs_tbl_dir) HDFS.createDirectory(hdfs_test_dir, user=cls._hdfs_user, perm='777', force=True) HDFS.createDirectory(hdfs_tbl_dir, perm='777') HDFS.copyFromLocal(test_data_file, hdfs_tbl_dir) HDFS.chmod(cls._hdfs_user, '777', hdfs_tbl_dir) logger.info("Creating table '%s' and verification tables" % tbl_name) query = "drop table if exists %s;\n" % tbl_name query += "create external table %s (userid string, age int) row format delimited fields terminated by '|' stored as textfile location '%s';\n" % ( tbl_name, hdfs_tbl_dir) query += "drop table if exists %s_hive_verify;\n" % tbl_name query += "create table %s_hive_verify (userid string, age int);\n" % tbl_name if type == "Long running": for i in range(cls._num_of_webhcat_bgj): query += "drop table if exists %s_wh_%d;\n" % (tbl_name, i + 1) query += "create table %s_wh_%d (userid string, age int);\n" % ( tbl_name, i + 1) hivesetupfile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), "hivesetup.sql") util.writeToFile(query, hivesetupfile) exit_code, stdout = Hive.run("-f " + hivesetupfile, logoutput=False) if type: msg = "%s job setup for Hive component" % type if exit_code != 0: UpgradePerNode.reportProgress( "[FAILED][Hive][Setup] %s failed due to exitcode = %d" % (msg, exit_code)) else: UpgradePerNode.reportProgress( "[PASSED][Hive][Setup] %s finished successfully" % msg)
def reportProgress(cls, message): ''' Method to report Upgrade status messages in Local File and HDFS File :param message: Message to be appended in Local status file and HDFS status file ''' tmp_append_file = os.path.join(Config.getEnv('ARTIFACTS_DIR'), 'tmp_append') if Machine.pathExists(None, None, tmp_append_file, None): Machine.rm(None, None, tmp_append_file, isdir=False, passwd=None) #if not HDFS.fileExists(cls._PROGRESS_STAUS_HDFS_FILE, user=None): # HDFS.touchz(cls._PROGRESS_STAUS_HDFS_FILE) message = "\n" + message + "\n" logger.info(message) util.writeToFile(message, cls._PROGRESS_STATUS_LOCAL_FILE, isAppend=True) util.writeToFile(message, tmp_append_file, isAppend=False)
def runas(cls, user, cmd, host="", cwd=None, env=None, logoutput=True, passwd=None): if Machine.isLinux(): if user is None and passwd is None: user = Machine.getAdminUser() keypairLocation = "/home/hrt_qa/.ssh/id_rsa_log_server_hrt_qa" if Machine.isHumboldt(): # for secure cluster in humboldt path is different if not os.path.exists(keypairLocation): keypairLocation = "/home/HDINSIGHT/hrt_qa/.ssh/id_rsa_log_server_hrt_qa" # pylint: disable=line-too-long cmd = "rsync -e 'ssh -i %s -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p %s -o PreferredAuthentications=publickey' %s" % ( keypairLocation, ssh_port_number, cmd) # pylint: enable=line-too-long else: # pylint: disable=line-too-long cmd = "rsync -e \"ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p %s -o PreferredAuthentications=publickey\" %s" % ( ssh_port_number, cmd) # pylint: enable=line-too-long if logoutput: logger.info("RSync.runas cmd=%s", cmd) return Machine.runas(user, cmd, host, cwd, env, logoutput, passwd, retry_count, retry_sleep_time) else: rsyncHome = "/cygdrive/c/testtools/cwrsync" rsyncLocation = "c:\\testtools\\cwrsync\\rsync.exe" #must use c:\\ so remote powershell can pick this up. keypairLocation = "c:\\testtools\\id_rsa_log_server_hrt_qa" # pylint: disable=line-too-long cmd = "%s -e \"%s/ssh -i %s -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -p %s -o PreferredAuthentications=publickey\" %s" % \ (rsyncLocation, rsyncHome, keypairLocation, ssh_port_number, cmd) # pylint: enable=line-too-long if logoutput: logger.info("RSync.runas cmd=%s", cmd) if Machine.isSameHost(host, None): pass else: # remote # I cant find a way to call this on the fly. # The only way it can work is to create bat file for the command and run the file. batchFilename = 'rsync-%s.bat' % str(int(time.time())) tmpLocalFile = os.path.join(Config.getEnv('ARTIFACTS_DIR'), batchFilename) logger.info("write to %s. contents=%s", tmpLocalFile, cmd) util.writeToFile(cmd, tmpLocalFile) destPath = os.path.join(Machine.getTempDir(), batchFilename) Machine.copyFromLocal(Machine.getAdminUser(), host, tmpLocalFile, destPath, Machine.getAdminPasswd()) cmd = destPath return Machine.runas(Machine.getAdminUser(), cmd, host, cwd, env, logoutput, passwd=Machine.getAdminPasswd(), retry_count=retry_count, retry_sleep_time=retry_sleep_time)
def modifyConfig( # pylint: disable=redefined-builtin changes, confDir, updatedConfDir, nodes, isFirstUpdate=True, makeCurrConfBackupInWindows=True, id=None): ''' Modifies hadoop config or config with similar structure. Returns None. Linux: 1. Create tmpModifyConfDir_<time> in artifacts dir based on source config directory in gateway 2. Modify contents in created directory. 3. Copy the directory to /tmp/hadoopConf in target machines Windows: 1. If makeCurrConfBackupInWindows is True, backup current config first. Copy current config to artifacts/HDPStackBackupConfig 2. Create tmpModifyConfDir_<time> in gateway. 3. Modify contents in created directory. 4. Copy the directory to target machines. Replace config in default locations in remote machines. Calling modifyConfig twice, changes will be cumulative. ''' backuploc = getBackupConfigLocation(id=id) if Machine.type() == 'Windows' and makeCurrConfBackupInWindows: # clean up the backup Machine.rm(None, Machine.getfqdn(), backuploc, isdir=True, passwd=None) util.copyReadableFilesFromDir(confDir, backuploc) if isFirstUpdate: tmpConfDir = os.path.join( ARTIFACTS_DIR, 'tmpModifyConfDir_' + str(int(round(time.time() * 1000)))) Config.set(PYTHON_CONFIG_NAME, TMP_CONF_DIR_VAR, tmpConfDir, overwrite=True) tmpConfDir = Config.get(PYTHON_CONFIG_NAME, TMP_CONF_DIR_VAR) if isFirstUpdate: util.copyReadableFilesFromDir(confDir, tmpConfDir) for filename, values in changes.items(): filepath = os.path.join(tmpConfDir, filename) if os.path.isfile(filepath): logger.info("Modifying file: %s", filepath) _fname, fext = os.path.splitext(filepath) if fext == ".xml": util.writePropertiesToConfigXMLFile(filepath, filepath, values) elif fext == ".json": util.writePropertiesToConfigJSONFile(filepath, filepath, values, ["global"], "site.hbase-site.") elif fext == ".properties": util.writePropertiesToFile(filepath, filepath, values) elif fext == ".cfg": util.writePropertiesToFile(filepath, filepath, values) elif fext == ".conf": util.writePropertiesToConfFile(filepath, filepath, values) elif fext == ".ini": # 'shiro.ini : {'section:prop' : 'val} util.writePropertiesToIniFile(filepath, filepath, values) elif fext == ".sh": text = "" for value in values: text += "\n" + value util.writeToFile(text, filepath, isAppend=True) elif fext == ".yaml": text = "" for k, v in values.iteritems(): text += k + " : " + v util.writeToFile(text, filepath, isAppend=True) elif fext == ".cmd": text = "" for value in values: text += "\n" + value util.writeToFile(text, filepath, isAppend=True) elif fext is None or fext == "" or fext == ".include": text = "" isFirst = True for value in values: if isFirst: text += value else: text += "\n" + value isFirst = False util.writeToFile(text, filepath, isAppend=True) # in windows world copy the configs back to the src location if Machine.type() == 'Windows': for node in nodes: for filename in changes.keys(): Machine.copyFromLocal(None, node, os.path.join(tmpConfDir, filename), os.path.join(confDir, filename), passwd=None) else: for node in nodes: Machine.rm(user=Machine.getAdminUser(), host=node, filepath=updatedConfDir, isdir=True, passwd=Machine.getAdminPasswd()) Machine.copyFromLocal(None, node, tmpConfDir, updatedConfDir)
def modifyConfigRemote(changes, OriginalConfDir, ConfDir, nodes, id=None): # pylint: disable=redefined-builtin ''' Modifies hadoop config or config with similar structure. Returns None. Linux: 1. Create tmpModifyConfDir_<time> in artifacts dir based on source config directory in gateway 2. Modify contents in created directory. 3. Copy the directory to /tmp/hadoopConf in target machines ''' _backuploc = getBackupConfigLocation(id=id) tmpConfDir = os.path.join( ARTIFACTS_DIR, 'tmpModifyConfDir_' + str(int(round(time.time() * 1000)))) Config.set(PYTHON_CONFIG_NAME, TMP_CONF_DIR_VAR, tmpConfDir, overwrite=True) tmpConfDir = Config.get(PYTHON_CONFIG_NAME, TMP_CONF_DIR_VAR) for node in nodes: Machine.rm(Machine.getAdminUser(), node, ConfDir, isdir=True) Machine.rm(Machine.getAdminUser(), Machine.getfqdn(), tmpConfDir, isdir=True) logger.info("*** COPY ORIGINAL CONFIGS FROM REMOTE TO LOCAL ***") Machine.copyToLocal(None, node, OriginalConfDir, tmpConfDir) #if node == Machine.getfqdn(): # Machine.copy(OriginalConfDir,tmpConfDir) for filename, values in changes.items(): filepath = os.path.join(tmpConfDir, filename) if os.path.isfile(filepath): logger.info("Modifying file locally: %s", filepath) _fname, fext = os.path.splitext(filepath) if fext == ".xml": util.writePropertiesToConfigXMLFile( filepath, filepath, values) elif fext == ".json": util.writePropertiesToConfigJSONFile( filepath, filepath, values, ["global"], "site.hbase-site.") elif fext == ".properties": util.writePropertiesToFile(filepath, filepath, values) elif fext == ".cfg": util.writePropertiesToFile(filepath, filepath, values) elif fext == ".conf": util.writePropertiesToConfFile(filepath, filepath, values) elif fext == ".sh": text = "" for value in values: text += "\n" + value util.writeToFile(text, filepath, isAppend=True) elif fext == ".yaml": text = "" for k, v in values.iteritems(): text += k + " : " + v util.writeToFile(text, filepath, isAppend=True) elif fext == ".cmd": text = "" for value in values: text += "\n" + value util.writeToFile(text, filepath, isAppend=True) elif fext is None or fext == "" or fext == ".include": text = "" isFirst = True for value in values: if isFirst: text += value else: text += "\n" + value isFirst = False util.writeToFile(text, filepath, isAppend=True) logger.info("****** Copy back the configs to remote ******") #if node!=Machine.getfqdn(): Machine.copyFromLocal(None, node, tmpConfDir, ConfDir) Machine.chmod('777', ConfDir, recursive=True, host=node)