def generateAndSendXmlFile(self): """ """ self.logger.debug("Start to generateAndSend XML file.\n") tempXmlFile = "%s/clusterconfig.xml" % self.tempFileDir cmd = "mkdir -p %s; touch %s; cat /dev/null > %s" % \ (self.tempFileDir, tempXmlFile, tempXmlFile) (status, output) = subprocess.getstatusoutput(cmd) cmd = "chown -R %s:%s %s" % (self.user, self.group, self.tempFileDir) (status, output) = subprocess.getstatusoutput(cmd) newHosts = self.context.newHostList for host in newHosts: # create single deploy xml file for each standby node xmlContent = self.__generateXml(host) with os.fdopen( os.open("%s" % tempXmlFile, os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fo: fo.write(xmlContent) fo.close() # send single deploy xml file to each standby node sshTool = SshTool(host) retmap, output = sshTool.getSshStatusOutput( "mkdir -p %s" % self.tempFileDir, [host], self.envFile) retmap, output = sshTool.getSshStatusOutput( "chown %s:%s %s" % (self.user, self.group, self.tempFileDir), [host], self.envFile) sshTool.scpFiles("%s" % tempXmlFile, "%s" % tempXmlFile, [host], self.envFile) self.cleanSshToolFile(sshTool) self.logger.debug("End to generateAndSend XML file.\n")
def sendSoftToHosts(self): """ create software dir and send it on each nodes """ self.logger.debug("Start to send soft to each standby nodes.\n") hostNames = self.context.newHostList hostList = hostNames sshTool = SshTool(hostNames) srcFile = self.context.packagepath targetDir = os.path.realpath(os.path.join(srcFile, "../")) ## mkdir package dir and send package to remote nodes. sshTool.executeCommand("mkdir -p %s" % srcFile, "", DefaultValue.SUCCESS, hostList) sshTool.scpFiles(srcFile, targetDir, hostList) ## change mode of package dir to set privileges for users tPathList = os.path.split(targetDir) path2ChangeMode = targetDir if len(tPathList) > 2: path2ChangeMode = os.path.join(tPathList[0], tPathList[1]) changeModCmd = "chmod -R a+x {srcFile}".format(user=self.user, group=self.group, srcFile=path2ChangeMode) sshTool.executeCommand(changeModCmd, "", DefaultValue.SUCCESS, hostList) self.logger.debug("End to send soft to each standby nodes.\n") self.cleanSshToolFile(sshTool)
def generateClusterStaticFile(self): """ generate static_config_files and send to all hosts """ self.logger.debug("Start to generate and send cluster static file.\n") primaryHosts = self.getPrimaryHostName() command = "gs_om -t generateconf -X %s --distribute" % self.context.xmlFile sshTool = SshTool([primaryHosts]) resultMap, outputCollect = sshTool.getSshStatusOutput( command, [primaryHosts], self.envFile) self.logger.debug(outputCollect) self.cleanSshToolFile(sshTool) nodeNameList = self.context.nodeNameList for hostName in nodeNameList: hostSsh = SshTool([hostName]) toolPath = self.context.clusterInfoDict["toolPath"] appPath = self.context.clusterInfoDict["appPath"] srcFile = "%s/script/static_config_files/cluster_static_config_%s" \ % (toolPath, hostName) if not os.path.exists(srcFile): GaussLog.exitWithError("Generate static file [%s] not found." \ % srcFile) targetFile = "%s/bin/cluster_static_config" % appPath hostSsh.scpFiles(srcFile, targetFile, [hostName], self.envFile) self.cleanSshToolFile(hostSsh) self.logger.debug("End to generate and send cluster static file.\n") time.sleep(10) # Single-node database need start cluster after expansion if self.isSingleNodeInstance: self.logger.debug("Single-Node instance need restart.\n") self.commonGsCtl.queryOmCluster(primaryHosts, self.envFile) # if primary database not normal, restart it primaryHost = self.getPrimaryHostName() dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"] insType, dbStat = self.commonGsCtl.queryInstanceStatus( primaryHost, dataNode, self.envFile) if insType != MODE_PRIMARY: self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode, MODE_PRIMARY, self.envFile) # if stat if not normal,rebuild standby database standbyHosts = self.context.newHostList for host in standbyHosts: hostName = self.context.backIpNameMap[host] dataNode = self.context.clusterInfoDict[hostName]["dataNode"] insType, dbStat = self.commonGsCtl.queryInstanceStatus( hostName, dataNode, self.envFile) if dbStat != STAT_NORMAL: self.commonGsCtl.startInstanceWithMode( hostName, dataNode, MODE_STANDBY, self.envFile) self.commonGsCtl.startOmCluster(primaryHosts, self.envFile)
def modifyStaticConf(self): """ Modify the cluster static conf and save it """ self.logger.log( "[gs_dropnode]Start to modify the cluster static conf.") staticConfigPath = "%s/bin/cluster_static_config" % self.appPath # first backup, only need to be done on primary node tmpDir = DefaultValue.getEnvironmentParameterValue( "PGHOST", self.user, self.userProfile) cmd = "cp %s %s/%s_BACKUP" % (staticConfigPath, tmpDir, 'cluster_static_config') (status, output) = subprocess.getstatusoutput(cmd) if status: self.logger.debug( "[gs_dropnode]Backup cluster_static_config failed" + output) backIpDict = self.context.backIpNameMap backIpDict_values = list(backIpDict.values()) backIpDict_keys = list(backIpDict.keys()) for ipLoop in self.context.hostIpListForDel: nameLoop = backIpDict_keys[backIpDict_values.index(ipLoop)] dnLoop = self.context.clusterInfo.getDbNodeByName(nameLoop) self.context.clusterInfo.dbNodes.remove(dnLoop) for dbNode in self.context.clusterInfo.dbNodes: if dbNode.name == self.localhostname: self.context.clusterInfo.saveToStaticConfig( staticConfigPath, dbNode.id) continue staticConfigPath_dn = "%s/cluster_static_config_%s" % (tmpDir, dbNode.name) self.context.clusterInfo.saveToStaticConfig( staticConfigPath_dn, dbNode.id) self.logger.debug( "[gs_dropnode]Start to scp the cluster static conf to any other node." ) if not self.context.flagOnlyPrimary: sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames()) cmd = "%s/script/gs_om -t refreshconf" % self.gphomepath (status, output) = subprocess.getstatusoutput(cmd) self.logger.debug( "[gs_dropnode]Output of refresh dynamic conf :%s." % output) for hostName in self.context.hostMapForExist.keys(): hostSsh = SshTool([hostName]) if hostName != self.localhostname: staticConfigPath_name = "%s/cluster_static_config_%s" % ( tmpDir, hostName) hostSsh.scpFiles(staticConfigPath_name, staticConfigPath, [hostName], self.envFile) try: os.unlink(staticConfigPath_name) except FileNotFoundError: pass self.cleanSshToolFile(hostSsh) self.logger.log("[gs_dropnode]End of modify the cluster static conf.")
def setGUCOnClusterHosts(self, hostNames=[]): """ guc config on all hosts """ gucDict = self.getGUCConfig() tempShFile = "%s/guc.sh" % self.tempFileDir if len(hostNames) == 0: hostNames = self.context.nodeNameList for host in hostNames: command = "source %s ; " % self.envFile + gucDict[host] self.logger.debug(command) sshTool = SshTool([host]) # create temporary dir to save guc command bashfile. mkdirCmd = "mkdir -m a+x -p %s; chown %s:%s %s" % \ (self.tempFileDir,self.user,self.group,self.tempFileDir) retmap, output = sshTool.getSshStatusOutput( mkdirCmd, [host], self.envFile) subprocess.getstatusoutput("mkdir -m a+x -p %s; touch %s; \ cat /dev/null > %s" % \ (self.tempFileDir, tempShFile, tempShFile)) with os.fdopen( os.open("%s" % tempShFile, os.O_WRONLY | os.O_CREAT, stat.S_IWUSR | stat.S_IRUSR), 'w') as fo: fo.write("#bash\n") fo.write(command) fo.close() # send guc command bashfile to each host and execute it. sshTool.scpFiles("%s" % tempShFile, "%s" % tempShFile, [host], self.envFile) resultMap, outputCollect = sshTool.getSshStatusOutput("sh %s" % \ tempShFile, [host], self.envFile) self.logger.debug(outputCollect) self.cleanSshToolFile(sshTool)
class OmImpl: """ init the command options save command line parameter values """ def __init__(self, OperationManager): """ function: constructor """ # global self.context = OperationManager self.logger = OperationManager.logger self.user = OperationManager.user self.newClusterInfo = None self.oldClusterInfo = None self.utilsPath = None self.mpprcFile = "" self.nodeId = OperationManager.g_opts.nodeId self.time_out = OperationManager.g_opts.time_out self.mode = OperationManager.g_opts.mode self.clusterInfo = OperationManager.clusterInfo self.dataDir = OperationManager.g_opts.dataDir self.sshTool = None def doStopCluster(self): """ function: do stop cluster input: NA output: NA """ pass def doClusterStatus(self): """ function: get cluster input: NA output: NA """ pass def doStart(self): """ function:Start cluster or node input:NA output:NA """ self.doStartCluster() def doStop(self): """ function:Stop cluster or node input:NA output:NA """ self.logger.debug("Operating: Stopping.") self.doStopCluster() def getNodeStatus(self, nodename): """ function: get node status input: nodename output: NA """ try: # Create a temporary file to save cluster status tmpDir = DefaultValue.getTmpDirFromEnv() tmpFile = os.path.join(tmpDir, "gauss_cluster_status.dat_" + \ str(datetime.now().strftime( '%Y%m%d%H%M%S')) + "_" + str( os.getpid())) # Perform the start operation # Writes the execution result to a temporary file cmd = ClusterCommand.getQueryStatusCmd(self.context.g_opts.user, "", tmpFile, True) (status, output) = subprocess.getstatusoutput(cmd) if (status != 0): self.logger.debug("The cmd is %s " % cmd) raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % \ cmd + "Error: \n%s" % output) # Initialize cluster status information for the temporary file clusterStatus = DbClusterStatus() clusterStatus.initFromFile(tmpFile) # Get node status nodeStatusInfo = None for dbNode in clusterStatus.dbNodes: if (dbNode.name == nodename): nodeStatusInfo = dbNode if (nodeStatusInfo and nodeStatusInfo.isNodeHealthy()): nodeStatus = clusterStatus.OM_NODE_STATUS_NORMAL else: nodeStatus = clusterStatus.OM_NODE_STATUS_ABNORMAL DefaultValue.cleanTmpFile(tmpFile) return nodeStatus except Exception as e: DefaultValue.cleanTmpFile(tmpFile) self.logger.debug("Failed to get node status. Error: \n%s." % str(e)) return "Abnormal" def doStatus(self): """ function:Get the status of cluster or node input:NA output:NA """ hostName = DefaultValue.GetHostIpOrName() sshtool = SshTool(self.context.clusterInfo.getClusterNodeNames()) nodeId = 0 if (self.context.g_opts.nodeName != ""): for dbnode in self.context.clusterInfo.dbNodes: if dbnode.name == self.context.g_opts.nodeName: nodeId = dbnode.id if (nodeId == 0): raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % self.context.g_opts.nodeName) cmd = queryCmd() if (self.context.g_opts.outFile != ""): cmd.outputFile = self.context.g_opts.outFile if (self.context.g_opts.show_detail): if (self.context.clusterInfo.clusterType == DefaultValue.CLUSTER_TYPE_SINGLE_PRIMARY_MULTI_STANDBY): cmd.dataPathQuery = True cmd.azNameQuery = True else: cmd.dataPathQuery = True else: if (nodeId > 0): self.context.clusterInfo.queryNodeInfo(sshtool, hostName, nodeId, cmd.outputFile) return if (self.context.g_opts.showAll): self.context.clusterInfo.queryNodeInfo(sshtool, hostName, nodeId, cmd.outputFile) return cmd.clusterStateQuery = True self.context.clusterInfo.queryClsInfo(hostName, sshtool, self.context.mpprcFile, cmd) def doRebuildConf(self): """ generating static configuration files for all nodes input:NA output:NA """ try: self.logger.log( "Generating static configuration files for all nodes.") # Initialize the cluster information according to the XML file self.context.clusterInfo = dbClusterInfo() self.context.clusterInfo.initFromXml(self.context.g_opts.confFile) # 1.create a tmp dir self.logger.log( "Creating temp directory to store static configuration files.") dirName = os.path.dirname(os.path.realpath(__file__)) tmpDirName = os.path.realpath("%s/../../static_config_files" % dirName) cmd = "mkdir -p -m %s '%s'" % (DefaultValue.KEY_DIRECTORY_MODE, tmpDirName) (status, output) = subprocess.getstatusoutput(cmd) if (status != 0): raise Exception(ErrorCode.GAUSS_502["GAUSS_50208"] % "temporary directory" + "\nCommand:%s\nError: %s" % (cmd, output)) self.logger.log("Successfully created the temp directory.") # create static files self.logger.log("Generating static configuration files.") for dbNode in self.context.clusterInfo.dbNodes: staticConfigPath = "%s/cluster_static_config_%s" % ( tmpDirName, dbNode.name) self.context.clusterInfo.saveToStaticConfig( staticConfigPath, dbNode.id) self.logger.log( "Successfully generated static configuration files.") self.logger.log( "Static configuration files for all nodes are saved in %s." % tmpDirName) # check if need send static config files if not self.context.g_opts.distribute: self.logger.debug( "No need to distribute static configuration files " "to installation directory.") return # distribute static config file self.logger.log( "Distributing static configuration files to all nodes.") for dbNode in self.context.clusterInfo.dbNodes: if (dbNode.name != DefaultValue.GetHostIpOrName()): cmd = \ "pscp -H %s '%s'/cluster_static_config_%s '%s'" \ "/bin/cluster_static_config" % ( dbNode.name, tmpDirName, dbNode.name, self.context.clusterInfo.appPath) else: cmd = \ "cp '%s'/cluster_static_config_%s '%s'" \ "/bin/cluster_static_config" % ( tmpDirName, dbNode.name, self.context.clusterInfo.appPath) (status, output) = subprocess.getstatusoutput(cmd) if (status != 0): raise Exception(ErrorCode.GAUSS_502["GAUSS_50216"] % "static configuration file" + "Node: %s.\nCommand: \n%s\nError: \n%s" % (dbNode.name, cmd, output)) self.logger.log( "Successfully distributed static configuration files.") except Exception as e: g_file.removeDirectory(tmpDirName) raise Exception(str(e)) ########################################################################## # doReplaceSSLCert start ########################################################################## def doReplaceSSLCert(self): """ function: replace ssl cert files input: NA output: NA """ try: # Initialize the cluster information according to the xml file self.context.clusterInfo = dbClusterInfo() self.context.clusterInfo.initFromStaticConfig( g_OSlib.getPathOwner(self.context.g_opts.certFile)[0]) self.sshTool = SshTool( self.context.clusterInfo.getClusterNodeNames(), self.logger.logFile) except Exception as e: raise Exception(str(e)) try: self.logger.log("Starting ssl cert files replace.", "addStep") tempDir = os.path.join(DefaultValue.getTmpDirFromEnv(), "tempCertDir") # unzip files to temp directory if (os.path.exists(tempDir)): g_file.removeDirectory(tempDir) g_file.createDirectory(tempDir, True, DefaultValue.KEY_DIRECTORY_MODE) g_file.decompressZipFiles(self.context.g_opts.certFile, tempDir) realCertList = DefaultValue.CERT_FILES_LIST clientCertList = DefaultValue.CLIENT_CERT_LIST # check file exists for clientCert in clientCertList: sslFile = os.path.join(tempDir, clientCert) if (not os.path.isfile(sslFile)): raise Exception( (ErrorCode.GAUSS_502["GAUSS_50201"] % sslFile) + \ "Missing SSL client cert file in ZIP file.") certList = [] dnDict = self.getDnNodeDict() for cert in realCertList: sslFile = os.path.join(tempDir, cert) if (not os.path.isfile(sslFile) and cert != DefaultValue.SSL_CRL_FILE): raise Exception( (ErrorCode.GAUSS_502["GAUSS_50201"] % sslFile) + \ "Missing SSL server cert file in ZIP file.") if (os.path.isfile(sslFile)): certList.append(cert) # distribute cert files to datanodes self.doDNBackup() self.distributeDNCert(certList, dnDict) # clear temp directory g_file.removeDirectory(tempDir) if (not self.context.g_opts.localMode): self.logger.log( "Successfully distributed cert files on all nodes.") except Exception as e: g_file.removeDirectory(tempDir) raise Exception(str(e)) def isDnEmpty(self, nodeName=""): """ function: Is there exists empty file in dbnodes directory. input: node name output: True/False """ allDnNodeDict = self.getDnNodeDict() nodeDnDir = allDnNodeDict[nodeName] emptyCert = os.path.join(nodeDnDir, EMPTY_CERT) status = self.sshTool.checkRemoteFileExist( nodeName, emptyCert, self.context.g_opts.mpprcFile) return status def doDNBackup(self): """ function: backup SSL cert files on single_inst cluster. input: backupFlag is a flag of exist DB in node output: NA """ self.logger.log("Backing up old ssl cert files.") backupList = DefaultValue.CERT_FILES_LIST[:] allDnNodeDict = self.getDnNodeDict() normalNodeList = [] tarBackupList = [] if (self.context.g_opts.localMode): self.logger.debug("Backing up database node SSL cert files.") nodeDnDir = allDnNodeDict[DefaultValue.GetHostIpOrName()] backupFlagFile = os.path.join(nodeDnDir, "certFlag") if (os.path.isfile(backupFlagFile)): self.logger.log("There is no need to backup ssl cert files.") return os.mknod(backupFlagFile, DefaultValue.KEY_FILE_PERMISSION) for certFile in backupList: realCertFile = os.path.join(nodeDnDir, certFile) if (os.path.isfile(realCertFile)): tarBackupList.append(certFile) if (len(tarBackupList) == 0): os.mknod(os.path.join(nodeDnDir, EMPTY_CERT)) cmd = " %s && " % g_Platform.getCdCmd(nodeDnDir) cmd += g_Platform.getCompressFilesCmd( DefaultValue.CERT_BACKUP_FILE, EMPTY_CERT) else: cmd = " %s && " % g_Platform.getCdCmd(nodeDnDir) cmd += "tar -zcvf %s" % (DefaultValue.CERT_BACKUP_FILE) for certFile in tarBackupList: cmd += " %s" % certFile (status, output) = DefaultValue.retryGetstatusoutput(cmd) if (status != 0): raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Failed backup gds cert files on local node." + "Error: \n%s" % output) # Clear empty file if (os.path.isfile(os.path.join(nodeDnDir, EMPTY_CERT))): os.remove(os.path.join(nodeDnDir, EMPTY_CERT)) self.logger.log("Successfully executed local backup.") return # 1 check backup flag file on all dbnodes. for node in allDnNodeDict.keys(): nodeDnDir = allDnNodeDict[node] backupFlagFile = os.path.join(nodeDnDir, "certFlag") status = self.sshTool.checkRemoteFileExist( node, backupFlagFile, self.context.g_opts.mpprcFile) if not status: normalNodeList.append(node) # 2 if exists flag file on anyone node, there will be return. if (len(normalNodeList) != len(allDnNodeDict.keys())): self.logger.log("There is no need to backup on all dbnodes.") return # 3 backup cert files on all dbnodes. for node in allDnNodeDict.keys(): nodeDnDir = allDnNodeDict[node] backupFlagFile = os.path.join(nodeDnDir, "certFlag") backupTar = os.path.join(nodeDnDir, DefaultValue.CERT_BACKUP_FILE) sshcmd = g_file.SHELL_CMD_DICT["overWriteFile"] % ( "backupflagfile", backupFlagFile) sshcmd += " && " + g_file.SHELL_CMD_DICT["changeMode"] % ( DefaultValue.KEY_FILE_MODE, backupFlagFile) self.sshTool.executeCommand(sshcmd, "Make a flag file of backup.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) for certFile in backupList: realCertFile = os.path.join(nodeDnDir, certFile) status = self.sshTool.checkRemoteFileExist( node, realCertFile, self.context.g_opts.mpprcFile) if status: tarBackupList.append(certFile) # if no cert files, # there will be create a file for '.tar' file. if (len(tarBackupList) == 0): sshcmd = g_Platform.getCreateFileCmd( os.path.join(nodeDnDir, EMPTY_CERT)) self.sshTool.executeCommand(sshcmd, "Backup empty cert file.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) sshcmd = " %s && " % g_Platform.getCdCmd(nodeDnDir) sshcmd += g_Platform.getCompressFilesCmd( DefaultValue.CERT_BACKUP_FILE, EMPTY_CERT) else: sshcmd = " %s && " % g_Platform.getCdCmd(nodeDnDir) sshcmd += "tar -zcvf %s" % (DefaultValue.CERT_BACKUP_FILE) for certDir in tarBackupList: sshcmd += " %s" % certDir self.sshTool.executeCommand(sshcmd, "Backup cert file.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) # Clear empty file if (self.isDnEmpty(node)): sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % (os.path.join( nodeDnDir, EMPTY_CERT), os.path.join( nodeDnDir, EMPTY_CERT)) self.sshTool.executeCommand(sshcmd, "Clear empty file.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) self.logger.log("Successfully backup SSL cert files on [%s]." % node) sshcmd = g_file.SHELL_CMD_DICT["changeMode"] % ( DefaultValue.KEY_FILE_MODE, backupTar) self.sshTool.executeCommand(sshcmd, "Chmod back up cert", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) def doDNSSLCertRollback(self): """ function: rollback SSL cert file in DN instance directory input: NA output: NA """ self.context.clusterInfo = dbClusterInfo() self.context.clusterInfo.initFromStaticConfig( pwd.getpwuid(os.getuid()).pw_name) self.sshTool = SshTool(self.context.clusterInfo.getClusterNodeNames(), self.logger.logFile) backupList = DefaultValue.CERT_FILES_LIST[:] allDnNodeDict = self.getDnNodeDict() noBackupList = [] temp = "tempDir" if self.context.g_opts.localMode: if ((DefaultValue.GetHostIpOrName() in allDnNodeDict.keys()) and os.path.isfile( os.path.join( allDnNodeDict[DefaultValue.GetHostIpOrName()], DefaultValue.CERT_BACKUP_FILE))): localDnDir = allDnNodeDict[DefaultValue.GetHostIpOrName()] tempDir = os.path.join(localDnDir, temp) if (os.path.exists(tempDir)): g_file.removeDirectory(tempDir) os.mkdir(tempDir, DefaultValue.KEY_DIRECTORY_PERMISSION) for certFile in backupList: realCertFile = os.path.join(localDnDir, certFile) if (os.path.exists(realCertFile)): g_file.moveFile(realCertFile, tempDir) cmd = "cd '%s' && if [ -f '%s' ];then tar -zxvf %s;fi" % \ (localDnDir, DefaultValue.CERT_BACKUP_FILE, DefaultValue.CERT_BACKUP_FILE) (status, output) = subprocess.getstatusoutput(cmd) if (status != 0): cmd = "cp '%s'/* '%s' && rm -rf '%s'" % ( tempDir, localDnDir, tempDir) (status, output) = subprocess.getstatusoutput(cmd) raise Exception((ErrorCode.GAUSS_514["GAUSS_51400"] % cmd) + "Failed uncompression SSL backup file." + "Error: \n%s" % output) # remove temp directory if (os.path.exists(tempDir)): g_file.removeDirectory(tempDir) # set guc option if (os.path.isfile( os.path.join(localDnDir, DefaultValue.SSL_CRL_FILE))): cmd = \ "gs_guc set -D %s " \ "-c \"ssl_crl_file=\'%s\'\"" \ % (localDnDir, DefaultValue.SSL_CRL_FILE) else: cmd = \ "gs_guc set -D %s " \ "-c \"ssl_crl_file=\'\'\"" % localDnDir (status, output) = subprocess.getstatusoutput(cmd) if (status != 0): raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + "Error: \n%s" % output) if (os.path.isfile(os.path.join(localDnDir, EMPTY_CERT))): os.remove(os.path.join(localDnDir, EMPTY_CERT)) self.logger.log( "Successfully rollback SSL cert files with local mode.") return else: self.logger.log("There is not exists backup files.") return # 1.check backup file "gsql_cert_backup.tar.gz" on all dbnodes. for node in allDnNodeDict.keys(): backupGzFile = os.path.join(allDnNodeDict[node], DefaultValue.CERT_BACKUP_FILE) status = self.sshTool.checkRemoteFileExist( node, backupGzFile, self.context.g_opts.mpprcFile) if not status: noBackupList.append(node) if (len(noBackupList) > 0): raise Exception((ErrorCode.GAUSS_502["GAUSS_50201"] % DefaultValue.CERT_BACKUP_FILE) + "Can't rollback SSL cert files on %s." % noBackupList) # 2.perform rollback on all dbnodes. for node in allDnNodeDict.keys(): backupGzFile = os.path.join(allDnNodeDict[node], DefaultValue.CERT_BACKUP_FILE) # 2-1.move SSL cert files in dn directory to temp directory. sshcmd = "cd '%s' && if [ -d '%s' ];then rm -rf '%s'" \ " && mkdir '%s';else mkdir '%s';fi" % \ (allDnNodeDict[node], temp, temp, temp, temp) self.sshTool.executeCommand(sshcmd, "Make temp directory.", DefaultValue.SUCCESS, \ [node], self.context.g_opts.mpprcFile) for certFile in backupList: realCertFile = os.path.join(allDnNodeDict[node], certFile) sshcmd = " %s && " % g_Platform.getCdCmd( os.path.join(allDnNodeDict[node], temp)) sshcmd += g_file.SHELL_CMD_DICT["renameFile"] % ( realCertFile, realCertFile, "./") self.sshTool.executeCommand( sshcmd, "Backup cert files to temp directory.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) # 2-2.uncompression "gsql_cert_backup.tar.gz" file sshcmd = "cd '%s' && if [ -f '%s' ];then tar -zxvf %s;fi" % \ (allDnNodeDict[node], DefaultValue.CERT_BACKUP_FILE, DefaultValue.CERT_BACKUP_FILE) self.sshTool.executeCommand(sshcmd, "Unzip backup file.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) # 2-3.clear temp directory sshcmd = " %s && " % g_Platform.getCdCmd(allDnNodeDict[node]) sshcmd += g_file.SHELL_CMD_DICT["deleteDir"] % (temp, temp) self.sshTool.executeCommand(sshcmd, "Clear backup cert files.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) # 2-4.is have "sslcrl-file.crl",config 'ssl_crl_file' option status = self.sshTool.checkRemoteFileExist( node, os.path.join(allDnNodeDict[node], DefaultValue.SSL_CRL_FILE), self.context.g_opts.mpprcFile) # exists 'sslcrl-file.crl' file ,config option of 'postgresql.conf' if (status): if node == DefaultValue.GetHostIpOrName(): sshcmd = \ "gs_guc set -D %s " \ "-c \"ssl_crl_file='%s'\"" \ % (allDnNodeDict[node], DefaultValue.SSL_CRL_FILE) else: sshcmd = "gs_guc set -D %s " \ "-c \"ssl_crl_file=\\\\\\'%s\\\\\\'\"" \ % (allDnNodeDict[node], DefaultValue.SSL_CRL_FILE) self.sshTool.executeCommand(sshcmd, "Exist 'ssl_crl_file'", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) else: if (node == DefaultValue.GetHostIpOrName()): sshcmd = "gs_guc set " \ "-D %s -c \"ssl_crl_file=''\"" % ( allDnNodeDict[node]) else: sshcmd = "gs_guc set " \ "-D %s -c \"ssl_crl_file=\\\\\\'\\\\\\'\"" \ % (allDnNodeDict[node]) self.sshTool.executeCommand(sshcmd, "No exist 'ssl_crl_file'", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) # Clear empty file. if (self.isDnEmpty(node)): sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % (os.path.join( allDnNodeDict[node], EMPTY_CERT), os.path.join(allDnNodeDict[node], EMPTY_CERT)) self.sshTool.executeCommand(sshcmd, "Clear empty file.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) self.logger.log("Successfully rollback SSL cert files on [%s]." % node) def getDnNodeDict(self): """ function: get dbnodes information input: NA output: dictionary """ clusterDnNodes = {} if (not self.context.clusterInfo.isSingleInstCluster()): return clusterDnNodes for node in self.context.clusterInfo.dbNodes: if (len(node.datanodes) > 0): clusterDnNodes[ node.datanodes[0].hostname] = node.datanodes[0].datadir self.logger.debug("Successfully get database node dict.") return clusterDnNodes def distributeDNCert(self, certList, dnDict=None): """ function: distribute ssl cert files on single_inst cluster input: certList: cert files list dnDict: dictionary output: NA """ tempDir = "tempCertDir" gphost = DefaultValue.getTmpDirFromEnv() if dnDict is None: dnDict = {} dnName = dnDict.keys() certPathList = [] self.logger.debug(certList) for num in iter(certList): sslPath = os.path.join(os.path.join(gphost, tempDir), num) certPathList.append(sslPath) # local mode if self.context.g_opts.localMode: localDnDir = dnDict[DefaultValue.GetHostIpOrName()] for num in range(len(certList)): # distribute gsql SSL cert if (os.path.isfile(os.path.join(localDnDir, certList[num]))): os.remove(os.path.join(localDnDir, certList[num])) if (os.path.isfile(certPathList[num])): g_file.cpFile(certPathList[num], os.path.join(localDnDir, certList[num])) g_file.changeMode(DefaultValue.KEY_FILE_MODE, os.path.join(localDnDir, certList[num])) # remove 'sslcrl-file.crl' file if (DefaultValue.SSL_CRL_FILE not in certList and os.path.isfile( os.path.join(localDnDir, DefaultValue.SSL_CRL_FILE))): os.remove(os.path.join(localDnDir, DefaultValue.SSL_CRL_FILE)) # config 'sslcrl-file.crl' option in 'postgresql.conf' if (os.path.isfile( os.path.join(localDnDir, DefaultValue.SSL_CRL_FILE))): cmd = "gs_guc set " \ "-D %s -c \"ssl_crl_file=\'%s\'\"" % \ (localDnDir, DefaultValue.SSL_CRL_FILE) (status, output) = subprocess.getstatusoutput(cmd) if (status != 0): raise Exception((ErrorCode.GAUSS_514["GAUSS_51400"] % cmd) + "Failed set 'ssl_crl_file' option." + "Error: \n%s" % output) else: cmd = "gs_guc set -D %s -c \"ssl_crl_file=\'\'\"" \ % localDnDir (status, output) = subprocess.getstatusoutput(cmd) if (status != 0): raise Exception((ErrorCode.GAUSS_514["GAUSS_51400"] % cmd) + "Failed set 'ssl_crl_file' option." + "Error: \n%s" % output) # remove backup flag file 'certFlag' if (os.path.isfile(os.path.join(localDnDir, 'certFlag'))): os.remove(os.path.join(localDnDir, 'certFlag')) self.logger.log( "Replace SSL cert files with local mode successfully.") return # not local mode for node in dnName: for num in range(len(certList)): sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % (os.path.join( dnDict[node], certList[num]), os.path.join(dnDict[node], certList[num])) self.sshTool.executeCommand(sshcmd, "Delete read only cert file.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) if (os.path.exists(certPathList[num])): self.sshTool.scpFiles(certPathList[num], dnDict[node], [node]) # change permission of cert file 600, # there no need to is exists file, # because the files must be exist. sshcmd = g_file.SHELL_CMD_DICT["changeMode"] % ( DefaultValue.KEY_FILE_MODE, os.path.join(dnDict[node], certList[num])) self.sshTool.executeCommand(sshcmd, "Change file permisstion.'", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) if (DefaultValue.SSL_CRL_FILE in certList): if (node == DefaultValue.GetHostIpOrName()): sshcmd = "gs_guc set " \ "-D %s -c \"ssl_crl_file='%s'\"" \ % (dnDict[node], DefaultValue.SSL_CRL_FILE) else: sshcmd = "gs_guc set " \ " -D %s -c \"ssl_crl_file=\\\\\\'%s\\\\\\'\"" \ % (dnDict[node], DefaultValue.SSL_CRL_FILE) self.sshTool.executeCommand(sshcmd, "Find 'ssl_crl_file'", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) else: # no ssl cert file there will delete old cert file, # and config option ssl_crl_file = '' sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % ( os.path.join(dnDict[node], DefaultValue.SSL_CRL_FILE), os.path.join(dnDict[node], DefaultValue.SSL_CRL_FILE)) self.sshTool.executeCommand(sshcmd, "Find 'ssl_crl_file'", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) if (node == DefaultValue.GetHostIpOrName()): sshcmd = "gs_guc set " \ "-D %s -c \"ssl_crl_file=\'\'\"" % (dnDict[node]) else: sshcmd = \ "gs_guc set " \ "-D %s " \ "-c \"ssl_crl_file=\\\\\\'\\\\\\'\"" % (dnDict[node]) self.sshTool.executeCommand(sshcmd, "Find 'ssl_crl_file'", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) # remove file 'sslcrl-file.crl' sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % ( os.path.join(dnDict[node], DefaultValue.SSL_CRL_FILE), os.path.join(dnDict[node], DefaultValue.SSL_CRL_FILE)) self.sshTool.executeCommand(sshcmd, "Delete read only cert file.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) # remove backup flag file 'certFlag' sshcmd = g_file.SHELL_CMD_DICT["deleteFile"] % (os.path.join( dnDict[node], "certFlag"), os.path.join(dnDict[node], "certFlag")) self.sshTool.executeCommand(sshcmd, "Delete backup flag file.", DefaultValue.SUCCESS, [node], self.context.g_opts.mpprcFile) self.logger.log("%s replace SSL cert files successfully." % node) ########################################################################### # Kerberos Flow ########################################################################### def doKerberos(self): """ function: operation kerberos input: NA output: NA """ try: if self.context.g_opts.kerberosMode == "install": self.logger.log("Starting install Kerberos.", "addStep") cmd = "%s -m %s -U %s --%s" % \ (OMCommand.getLocalScript("Local_Kerberos"), "install", self.context.g_opts.clusterUser, self.context.g_opts.kerberosType) # local mode (status, output) = subprocess.getstatusoutput(cmd) if (status != 0): raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % "Command: %s. Error:\n%s" % (cmd, output)) self.logger.log("Successfully install Kerberos.") elif self.context.g_opts.kerberosMode == "uninstall": self.logger.log("Starting uninstall Kerberos.", "addStep") cmd = "%s -m %s -U %s" % \ (OMCommand.getLocalScript("Local_Kerberos"), "uninstall", self.context.g_opts.clusterUser) # local mode (status, output) = subprocess.getstatusoutput(cmd) if status != 0: raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % "Command: %s. Error:\n%s" % (cmd, output)) self.logger.log("Successfully uninstall Kerberos.") except Exception as e: raise Exception(str(e)) def checkRemoteFileExist(self, filepath): """ funciton:check file exist on remote node input:filepath output:dictionary """ existNodes = [] for nodeName in self.context.clusterInfo.getClusterNodeNames(): if (nodeName == DefaultValue.GetHostIpOrName()): continue if (self.sshTool.checkRemoteFileExist(nodeName, filepath, "")): existNodes.append(nodeName) return existNodes def recursivePath(self, filepath): """ function: recursive path input: filepath output: NA """ fileList = os.listdir(filepath) for fileName in fileList: fileName = os.path.join(filepath, fileName) # change the owner of files g_file.changeOwner(self.context.g_opts.user, fileName) if (os.path.isfile(fileName)): # change fileName permission g_file.changeMode(DefaultValue.KEY_FILE_MODE, fileName) else: # change directory permission g_file.changeMode(DefaultValue.KEY_DIRECTORY_MODE, fileName, True) self.recursivePath(fileName) def checkNode(self): """ function: check if the current node is to be uninstalled input : NA output: NA """ pass def stopCluster(self): """ function:Stop cluster input:NA output:NA """ pass def startCluster(self): """ function:Start cluster input:NA output:NA """ pass
def generateClusterStaticFile(self): """ generate static_config_files and send to all hosts """ self.logger.debug("Start to generate and send cluster static file.\n") primaryHost = self.getPrimaryHostName() result = self.commonGsCtl.queryOmCluster(primaryHost, self.envFile) for nodeName in self.context.nodeNameList: nodeInfo = self.context.clusterInfoDict[nodeName] nodeIp = nodeInfo["backIp"] dataNode = nodeInfo["dataNode"] exist_reg = r"(.*)%s[\s]*%s(.*)%s(.*)" % (nodeName, nodeIp, dataNode) if not re.search( exist_reg, result) and nodeIp not in self.context.newHostList: self.logger.debug( "The node ip [%s] will not be added to cluster." % nodeIp) dbNode = self.context.clusterInfo.getDbNodeByName(nodeName) self.context.clusterInfo.dbNodes.remove(dbNode) toolPath = self.context.clusterInfoDict["toolPath"] appPath = self.context.clusterInfoDict["appPath"] static_config_dir = "%s/script/static_config_files" % toolPath if not os.path.exists(static_config_dir): os.makedirs(static_config_dir) for dbNode in self.context.clusterInfo.dbNodes: hostName = dbNode.name staticConfigPath = "%s/script/static_config_files/cluster_static_config_%s" % \ (toolPath, hostName) self.context.clusterInfo.saveToStaticConfig( staticConfigPath, dbNode.id) srcFile = staticConfigPath if not os.path.exists(srcFile): GaussLog.exitWithError("Generate static file [%s] not found." % srcFile) hostSsh = SshTool([hostName]) targetFile = "%s/bin/cluster_static_config" % appPath hostSsh.scpFiles(srcFile, targetFile, [hostName], self.envFile) self.cleanSshToolFile(hostSsh) self.logger.debug("End to generate and send cluster static file.\n") time.sleep(10) # Single-node database need start cluster after expansion if self.isSingleNodeInstance: primaryHost = self.getPrimaryHostName() self.logger.debug("Single-Node instance need restart.\n") self.commonGsCtl.queryOmCluster(primaryHost, self.envFile) # if primary database not normal, restart it dataNode = self.context.clusterInfoDict[primaryHost]["dataNode"] insType, dbStat = self.commonGsCtl.queryInstanceStatus( primaryHost, dataNode, self.envFile) if insType != MODE_PRIMARY: self.commonGsCtl.startInstanceWithMode(primaryHost, dataNode, MODE_PRIMARY, self.envFile) # if stat if not normal,rebuild standby database standbyHosts = self.context.newHostList for host in standbyHosts: hostName = self.context.backIpNameMap[host] dataNode = self.context.clusterInfoDict[hostName]["dataNode"] insType, dbStat = self.commonGsCtl.queryInstanceStatus( hostName, dataNode, self.envFile) if dbStat != STAT_NORMAL: self.commonGsCtl.startInstanceWithMode( hostName, dataNode, MODE_STANDBY, self.envFile) self.commonGsCtl.startOmCluster(primaryHost, self.envFile)
class ParallelBaseOM(object): """ Base class of parallel command """ ACTION_INSTALL = "install" ACTION_CONFIG = "config" ACTION_START = "start" ACTION_REDISTRIBUTE = "redistribute" ACTION_HEALTHCHECK = "healthcheck" HEALTH_CHECK_BEFORE = "before" HEALTH_CHECK_AFTER = "after" """ Base class for parallel command """ def __init__(self): ''' Constructor ''' self.logger = None self.clusterInfo = None self.oldClusterInfo = None self.sshTool = None self.action = "" # Cluster config file. self.xmlFile = "" self.oldXmlFile = "" self.logType = DefaultValue.LOCAL_LOG_FILE self.logFile = "" self.localLog = "" self.user = "" self.group = "" self.mpprcFile = "" # Temporary catalog for install self.operateStepDir = TempfileManagement.getTempDir( "%s_step" % self.__class__.__name__.lower()) # Temporary files for install step self.operateStepFile = "%s/%s_step.dat" % ( self.operateStepDir, self.__class__.__name__.lower()) self.initStep = "" self.dws_mode = False self.rollbackCommands = [] self.etcdCons = [] self.cmCons = [] self.gtmCons = [] self.cnCons = [] self.dnCons = [] # localMode is same as isSingle in all OM script, expect for # gs_preinstall. # in gs_preinstall, localMode means local mode for master-standby # cluster. # in gs_preinstall, localMode also means local mode for single # cluster(will not create os user). # in gs_preinstall, isSingle means single cluster, it will create # os user. # not isSingle and not localMode : master-standby cluster global # mode(will create os user). # not isSingle and localMode : master-standby cluster local # mode(will not create os user). # isSingle and not localMode : single cluster(will create os user). # isSingle and localMode : single cluster(will not create os user). self.localMode = False self.isSingle = False # Indicates whether there is a logical cluster. # If elastic_group exists, the current cluster is a logical cluster. # Otherwise, it is a large physical cluster. self.isElasticGroup = False self.isAddElasticGroup = False self.lcGroup_name = "" # Lock the cluster mode, there are two modes: exclusive lock and # wait lock mode, # the default exclusive lock self.lockMode = "exclusiveLock" # SinglePrimaryMultiStandby support binary upgrade, inplace upgrade self.isSinglePrimaryMultiStandby = False # Adapt to 200 and 300 self.productVersion = None def initComponent(self): """ function: Init component input : NA output: NA """ for nodeInfo in self.clusterInfo.dbNodes: self.initKernelComponent(nodeInfo) def initComponentAttributes(self, component): """ function: Init component attributes on current node input : Object component output: NA """ component.logger = self.logger component.binPath = "%s/bin" % self.clusterInfo.appPath component.dwsMode = self.dws_mode def initKernelComponent(self, nodeInfo): """ function: Init kernel component input : Object nodeInfo output: NA """ for inst in nodeInfo.datanodes: component = DN_OLAP() # init component cluster type component.clusterType = self.clusterInfo.clusterType component.instInfo = inst self.initComponentAttributes(component) self.dnCons.append(component) def initLogger(self, module=""): """ function: Init logger input : module output: NA """ # log level LOG_DEBUG = 1 self.logger = GaussLog(self.logFile, module, LOG_DEBUG) dirName = os.path.dirname(self.logFile) self.localLog = os.path.join(dirName, DefaultValue.LOCAL_LOG_FILE) def initClusterInfo(self, refreshCN=True): """ function: Init cluster info input : NA output: NA """ try: self.clusterInfo = dbClusterInfo() if (refreshCN): static_config_file = "%s/bin/cluster_static_config" % \ DefaultValue.getInstallDir(self.user) self.clusterInfo.initFromXml(self.xmlFile, static_config_file) else: self.clusterInfo.initFromXml(self.xmlFile) except Exception as e: raise Exception(str(e)) self.logger.debug("Instance information of cluster:\n%s." % str(self.clusterInfo)) def initClusterInfoFromStaticFile(self, user, flag=True): """ function: Function to init clusterInfo from static file input : user output: NA """ try: self.clusterInfo = dbClusterInfo() self.clusterInfo.initFromStaticConfig(user) except Exception as e: raise Exception(str(e)) if flag: self.logger.debug("Instance information of cluster:\n%s." % str(self.clusterInfo)) def initSshTool(self, nodeNames, timeout=0): """ function: Init ssh tool input : nodeNames, timeout output: NA """ self.sshTool = SshTool(nodeNames, self.logger.logFile, timeout) def check_cluster_version_consistency(self, clusterNodes, newNodes=None): """ """ self.logger.log("Check cluster version consistency.") if newNodes is None: newNodes = [] dic_version_info = {} # check version.cfg on every node. gp_home = DefaultValue.getEnv("GPHOME") gauss_home = DefaultValue.getEnv("GAUSSHOME") if not (os.path.exists(gp_home) and os.path.exists(gauss_home)): GaussLog.exitWithError(ErrorCode.GAUSS_502["GAUSS_50201"] % ("%s", "or %s") % (gp_home, gauss_home)) for ip in clusterNodes: if ip in newNodes: cmd = "pssh -s -H %s 'cat %s/version.cfg'" % \ (ip, DefaultValue.getEnv("GPHOME")) else: cmd = "pssh -s -H %s 'cat %s/bin/upgrade_version'" % \ (ip, DefaultValue.getEnv("GAUSSHOME")) status, output = subprocess.getstatusoutput(cmd) if (status != 0): raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % cmd + " Error:\n%s" % str(output)) if len(output.strip().split()) < 3: raise Exception(ErrorCode.GAUSS_516["GAUSS_51623"]) dic_version_info[ip] = ",".join(output.strip().split()[1:]) self.logger.debug("The cluster version on every node.") for check_ip, version_info in dic_version_info.items(): self.logger.debug("%s : %s" % (check_ip, version_info)) if len(set(dic_version_info.values())) != 1: L_inconsistent = list(set(dic_version_info.values())) self.logger.debug("The package version on some nodes are " "inconsistent\n%s" % str(L_inconsistent)) raise Exception("The package version on some nodes are " "inconsistent,%s" % str(L_inconsistent)) self.logger.log("Successfully checked cluster version.") def checkBaseFile(self, checkXml=True): """ function: Check xml file and log file input : checkXml output: NA """ if (checkXml): if (self.xmlFile == ""): raise Exception(ErrorCode.GAUSS_500["GAUSS_50001"] % 'X' + ".") if (not os.path.exists(self.xmlFile)): raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % ("configuration file [%s]" % self.xmlFile)) if (not os.path.isabs(self.xmlFile)): raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % ("configuration file [%s]" % self.xmlFile)) else: self.xmlFile = "" if (self.logFile == ""): self.logFile = DefaultValue.getOMLogPath(self.logType, self.user, "", self.xmlFile) if (not os.path.isabs(self.logFile)): raise Exception(ErrorCode.GAUSS_502["GAUSS_50213"] % "log") def initSignalHandler(self): """ function: Function to init signal handler input : NA output: NA """ signal.signal(signal.SIGINT, signal.SIG_IGN) signal.signal(signal.SIGQUIT, signal.SIG_IGN) signal.signal(signal.SIGTERM, signal.SIG_IGN) signal.signal(signal.SIGALRM, signal.SIG_IGN) signal.signal(signal.SIGHUP, signal.SIG_IGN) signal.signal(signal.SIGUSR1, signal.SIG_IGN) signal.signal(signal.SIGUSR2, signal.SIG_IGN) def print_signal_stack(self, frame): """ function: Function to print signal stack input : frame output: NA """ if (self.logger is None): return try: import inspect stacks = inspect.getouterframes(frame) for curr in range(len(stacks)): stack = stacks[curr] self.logger.debug("Stack level: %d. File: %s. Function: " "%s. LineNo: %d." % (curr, stack[1], stack[3], stack[2])) self.logger.debug("Code: %s." % (stack[4][0].strip().strip("\n"))) except Exception as e: self.logger.debug("Failed to print signal stack. Error: \n%s" % str(e)) def raise_handler(self, signal_num, frame): """ function: Function to raise handler input : signal_num, frame output: NA """ if (self.logger is not None): self.logger.debug("Received signal[%d]." % (signal_num)) self.print_signal_stack(frame) raise Exception(ErrorCode.GAUSS_516["GAUSS_51614"] % (signal_num)) def setupTimeoutHandler(self): """ function: Function to set up time out handler input : NA output: NA """ signal.signal(signal.SIGALRM, self.timeout_handler) def setTimer(self, timeout): """ function: Function to set timer input : timeout output: NA """ self.logger.debug("Set timer. The timeout: %d." % timeout) signal.signal(signal.SIGALRM, self.timeout_handler) signal.alarm(timeout) def resetTimer(self): """ function: Reset timer input : NA output: NA """ signal.signal(signal.SIGALRM, signal.SIG_IGN) self.logger.debug("Reset timer. Left time: %d." % signal.alarm(0)) def timeout_handler(self, signal_num, frame): """ function: Received the timeout signal input : signal_num, frame output: NA """ if (self.logger is not None): self.logger.debug("Received the timeout signal: [%d]." % (signal_num)) self.print_signal_stack(frame) raise Timeout("Time out.") def waitProcessStop(self, processKeywords, hostname): """ function: Wait the process stop input : process name output: NA """ count = 0 while (True): psCmd = "ps ux|grep -v grep |awk '{print \$11}'|grep '%s' " % \ processKeywords.strip() (status, output) = self.sshTool.getSshStatusOutput(psCmd, [hostname]) # Determine whether the process can be found. if (status[hostname] != DefaultValue.SUCCESS): self.logger.debug("The %s process stopped." % processKeywords) break count += 1 if (count % 20 == 0): self.logger.debug("The %s process exists." % processKeywords) time.sleep(3) def managerOperateStepDir(self, action='create', nodes=None): """ function: manager operate step directory input : NA output: currentStep """ if nodes is None: nodes = [] try: # Creating the backup directory if (action == "create"): cmd = "(if [ ! -d '%s' ];then mkdir -p '%s' -m %s;fi)" % ( self.operateStepDir, self.operateStepDir, DefaultValue.KEY_DIRECTORY_MODE) else: cmd = "(if [ -d '%s' ];then rm -rf '%s';fi)" % ( self.operateStepDir, self.operateStepDir) DefaultValue.execCommandWithMode(cmd, "%s temporary directory" % action, self.sshTool, self.localMode or self.isSingle, "", nodes) except Exception as e: raise Exception(str(e)) def readOperateStep(self): """ function: read operate step signal input : NA output: currentStep """ currentStep = self.initStep if not os.path.exists(self.operateStepFile): self.logger.debug("The %s does not exits." % self.operateStepFile) return currentStep if not os.path.isfile(self.operateStepFile): self.logger.debug("The %s must be a file." % self.operateStepFile) return currentStep with open(self.operateStepFile, "r") as fp: line = fp.readline().strip() if line is not None and line != "": currentStep = line return currentStep def writeOperateStep(self, stepName, nodes=None): """ function: write operate step signal input : step output: NA """ if nodes is None: nodes = [] try: # write the step into INSTALL_STEP # open the INSTALL_STEP with open(self.operateStepFile, "w") as g_DB: # write the INSTALL_STEP g_DB.write(stepName) g_DB.write(os.linesep) g_DB.flush() # change the INSTALL_STEP permissions g_file.changeMode(DefaultValue.KEY_FILE_MODE, self.operateStepFile) # distribute file to all nodes cmd = "mkdir -p -m %s '%s'" % (DefaultValue.KEY_DIRECTORY_MODE, self.operateStepDir) DefaultValue.execCommandWithMode( cmd, "create backup directory " "on all nodes", self.sshTool, self.localMode or self.isSingle, "", nodes) if not self.localMode and not self.isSingle: self.sshTool.scpFiles(self.operateStepFile, self.operateStepDir, nodes) except Exception as e: # failed to write the step into INSTALL_STEP raise Exception(str(e)) def distributeFiles(self): """ function: distribute package to every host input : NA output: NA """ self.logger.debug("Distributing files.") try: # get the all nodes hosts = self.clusterInfo.getClusterNodeNames() if DefaultValue.GetHostIpOrName() not in hosts: raise Exception(ErrorCode.GAUSS_516["GAUSS_51619"] % DefaultValue.GetHostIpOrName()) hosts.remove(DefaultValue.GetHostIpOrName()) # Send xml file to every host DefaultValue.distributeXmlConfFile(self.sshTool, self.xmlFile, hosts, self.mpprcFile) # Successfully distributed files self.logger.debug("Successfully distributed files.") except Exception as e: # failed to distribute package to every host raise Exception(str(e)) def checkPreInstall(self, user, flag, nodes=None): """ function: check if have done preinstall on given nodes input : user, nodes output: NA """ if nodes is None: nodes = [] try: cmd = "%s -U %s -t %s" % ( OMCommand.getLocalScript("Local_Check_PreInstall"), user, flag) DefaultValue.execCommandWithMode(cmd, "check preinstall", self.sshTool, self.localMode or self.isSingle, "", nodes) except Exception as e: raise Exception(str(e)) def checkNodeInstall(self, nodes=None, checkParams=None, strictUserCheck=True): """ function: Check node install input : nodes, checkParams, strictUserCheck output: NA """ if nodes is None: nodes = [] if checkParams is None: checkParams = [] validParam = ["shared_buffers", "max_connections"] cooGucParam = "" for param in checkParams: entry = param.split("=") if (len(entry) != 2): raise Exception(ErrorCode.GAUSS_500["GAUSS_50009"]) if (entry[0].strip() in validParam): cooGucParam += " -C \\\"%s\\\"" % param self.logger.log("Checking installation environment on all nodes.") cmd = "%s -U %s:%s -R %s %s -l %s -X '%s'" % ( OMCommand.getLocalScript("Local_Check_Install"), self.user, self.group, self.clusterInfo.appPath, cooGucParam, self.localLog, self.xmlFile) if (not strictUserCheck): cmd += " -O" self.logger.debug("Checking the install command: %s." % cmd) DefaultValue.execCommandWithMode(cmd, "check installation environment", self.sshTool, self.localMode or self.isSingle, "", nodes) def cleanNodeConfig(self, nodes=None, datadirs=None): """ function: Clean instance input : nodes, datadirs output: NA """ self.logger.log("Deleting instances from all nodes.") if nodes is None: nodes = [] if datadirs is None: datadirs = [] cmdParam = "" for datadir in datadirs: cmdParam += " -D %s " % datadir cmd = "%s -U %s %s -l %s" % (OMCommand.getLocalScript( "Local_Clean_Instance"), self.user, cmdParam, self.localLog) DefaultValue.execCommandWithMode(cmd, "clean instance", self.sshTool, self.localMode or self.isSingle, "", nodes) self.logger.log("Successfully deleted instances from all nodes.") @staticmethod def getPrepareKeysCmd(key_file, user, confFile, destPath, logfile, userProfile="", localMode=False): """ function: get etcd communication keys command input: key_file, user, confFile, destPath, localMode:do not scp keys output: NA """ if (not os.path.exists(key_file)): raise Exception(ErrorCode.GAUSS_502["GAUSS_50201"] % key_file) if (not userProfile): userProfile = DefaultValue.getMpprcFile() # create the directory on all nodes cmd = "source %s; %s -U %s -X %s --src-file=%s --dest-path=%s -l %s" \ % (userProfile, OMCommand.getLocalScript("Local_PrepareKeys"), user, confFile, key_file, destPath, logfile) # if local mode, only prepare keys, do not scp keys to cluster nodes if (localMode): cmd += " -L" return cmd def getClusterRings(self, clusterInfo): """ function: get clusterRings from cluster info input: DbclusterInfo() instance output: list """ hostPerNodeList = self.getDNHostnamesPerNode(clusterInfo) # Loop the hostname list on each node where the master and slave # of the DB instance. for i in range(len(hostPerNodeList)): # Loop the list after the i-th list for perNodelist in hostPerNodeList[i + 1:len(hostPerNodeList)]: # Define a tag flag = 0 # Loop the elements of each perNodelist for hostNameElement in perNodelist: # If elements on the i-th node, each element of the # list are joined in hostPerNodeList[i if hostNameElement in hostPerNodeList[i]: flag = 1 for element in perNodelist: if element not in hostPerNodeList[i]: hostPerNodeList[i].append(element) if (flag == 1): hostPerNodeList.remove(perNodelist) return hostPerNodeList def getDNHostnamesPerNode(self, clusterInfo): """ function: get DB hostnames per node input: DbclusterInfo() instance output: list """ hostPerNodeList = [] for dbNode in clusterInfo.dbNodes: nodeDnlist = [] # loop per node for dnInst in dbNode.datanodes: if (dnInst.instanceType == DefaultValue.MASTER_INSTANCE): if dnInst.hostname not in nodeDnlist: nodeDnlist.append(dnInst.hostname) # get other standby and dummy hostname instances = clusterInfo.getPeerInstance(dnInst) for inst in instances: if inst.hostname not in nodeDnlist: nodeDnlist.append(inst.hostname) if nodeDnlist != []: hostPerNodeList.append(nodeDnlist) return hostPerNodeList # for olap function def checkIsElasticGroupExist(self, dbNodes): """ function: Check if elastic_group exists. input : NA output: NA """ self.logger.debug("Checking if elastic group exists.") self.isElasticGroup = False coorNode = [] # traverse old nodes for dbNode in dbNodes: if (len(dbNode.coordinators) >= 1): coorNode.append(dbNode.coordinators[0]) break # check elastic group CHECK_GROUP_SQL = "SELECT count(*) FROM pg_catalog.pgxc_group " \ "WHERE group_name='elastic_group' " \ "and group_kind='e'; " (checkstatus, checkoutput) = ClusterCommand.remoteSQLCommand( CHECK_GROUP_SQL, self.user, coorNode[0].hostname, coorNode[0].port) if (checkstatus != 0 or not checkoutput.isdigit()): raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "node group" + " Error:\n%s" % str(checkoutput)) elif (checkoutput.strip() == '1'): self.isElasticGroup = True elif (checkoutput.strip() == '0'): self.isElasticGroup = False else: raise Exception(ErrorCode.GAUSS_502["GAUSS_50219"] % "the number of node group") self.logger.debug("Successfully checked if elastic group exists.") def checkHostnameIsLoop(self, nodenameList): """ function: check if hostname is looped input : NA output: NA """ isRing = True # 1.get ring information in the cluster clusterRings = self.getClusterRings(self.clusterInfo) nodeRing = "" nodenameRings = [] # 2.Check if the node is in the ring for num in iter(clusterRings): ringNodeList = [] for nodename in nodenameList: if (nodename in num): ringNodeList.append(nodename) if (len(ringNodeList) != 0 and len(ringNodeList) == len(num)): nodenameRings.append(ringNodeList) if (len(ringNodeList) != 0 and len(ringNodeList) != len(num)): isRing = False break else: continue if not isRing: raise Exception(ErrorCode.GAUSS_500["GAUSS_50004"] % "h" + " The hostname (%s) specified by the -h parameter " "must be looped." % nodeRing) return (clusterRings, nodenameRings) def getDNinstanceByNodeName(self, hostname, isMaster=True): """ function: Get the DB instance of the node based on the node name. input : hostname isMaster: get master DB instance output: NA """ masterdnInsts = [] standbydnInsts = [] # notice for dbNode in self.clusterInfo.dbNodes: if (dbNode.name == hostname): for dbInst in dbNode.datanodes: # get master DB instance if (dbInst.instanceType == DefaultValue.MASTER_INSTANCE): masterdnInsts.append(dbInst) # get standby or dummy DB instance else: standbydnInsts.append(dbInst) if (isMaster): return masterdnInsts else: return standbydnInsts def getSQLResultList(self, sql, user, hostname, port, database="postgres"): """ """ (status, output) = ClusterCommand.remoteSQLCommand(sql, user, hostname, port, False, database) if status != 0 or ClusterCommand.findErrorInSql(output): raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % sql + " Error:\n%s" % str(output)) # split the output string with '\n' resultList = output.split("\n") return resultList def getCooInst(self): """ function: get CN instance input : NA output: CN instance """ coorInst = [] # get CN on nodes for dbNode in self.clusterInfo.dbNodes: if (len(dbNode.coordinators) >= 1): coorInst.append(dbNode.coordinators[0]) # check if contain CN on nodes if (len(coorInst) == 0): raise Exception(ErrorCode.GAUSS_526["GAUSS_52602"]) else: return coorInst def getGroupName(self, fieldName, fieldVaule): """ function: Get nodegroup name by field name and field vaule. input : field name and field vaule output: node group name """ # 1.get CN instance info from cluster cooInst = self.getCooInst() # 2.obtain the node group OBTAIN_SQL = "select group_name from pgxc_group where %s = %s; " % \ (fieldName, fieldVaule) # execute the sql command (status, output) = ClusterCommand.remoteSQLCommand(OBTAIN_SQL, self.user, cooInst[0].hostname, cooInst[0].port, ignoreError=False) if (status != 0): raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % OBTAIN_SQL + " Error:\n%s" % str(output)) return output.strip() def killKernalSnapshotThread(self, dnInst): """ function: kill snapshot thread in Kernel, avoid dead lock with redistribution) input : NA output: NA """ self.logger.debug("Stopping snapshot thread in database node Kernel.") killSnapshotSQL = "select * from kill_snapshot();" (status, output) = ClusterCommand.remoteSQLCommand( killSnapshotSQL, self.user, dnInst.hostname, dnInst.port, False, DefaultValue.DEFAULT_DB_NAME) if (status != 0): raise Exception(ErrorCode.GAUSS_514["GAUSS_51400"] % killSnapshotSQL + " Error:\n%s" % str(output)) self.logger.debug("Successfully stopped snapshot " "thread in database node Kernel.") def createServerCa(self, hostList=None): """ function: create grpc ca file input : NA output: NA """ self.logger.debug("Generating CA files.") if hostList is None: hostList = [] appPath = DefaultValue.getInstallDir(self.user) caPath = os.path.join(appPath, "share/sslcert/om") self.logger.debug("The ca file dir is: %s." % caPath) if (len(hostList) == 0): for dbNode in self.clusterInfo.dbNodes: hostList.append(dbNode.name) # Create CA dir and prepare files for using. self.logger.debug("Create CA file directory.") try: DefaultValue.createCADir(self.sshTool, caPath, hostList) self.logger.debug("Add hostname to config file.") DefaultValue.createServerCA(DefaultValue.SERVER_CA, caPath, self.logger) # Clean useless files, and change permission of ca file to 600. DefaultValue.cleanServerCaDir(caPath) self.logger.debug("Scp CA files to all nodes.") except Exception as e: certFile = caPath + "/demoCA/cacert.pem" if os.path.exists(certFile): g_file.removeFile(certFile) DefaultValue.cleanServerCaDir(caPath) raise Exception(str(e)) if not self.isSingle: # localhost no need scp files for certFile in DefaultValue.SERVER_CERT_LIST: scpFile = os.path.join(caPath, "%s" % certFile) self.sshTool.scpFiles(scpFile, caPath, hostList) self.logger.debug("Successfully generated server CA files.") def createGrpcCa(self, hostList=None): """ function: create grpc ca file input : NA output: NA """ self.logger.debug("Generating grpc CA files.") if hostList is None: hostList = [] appPath = DefaultValue.getInstallDir(self.user) caPath = os.path.join(appPath, "share/sslcert/grpc") self.logger.debug("The ca file dir is: %s." % caPath) if (len(hostList) == 0): for dbNode in self.clusterInfo.dbNodes: hostList.append(dbNode.name) # Create CA dir and prepare files for using. self.logger.debug("Create CA file directory.") try: DefaultValue.createCADir(self.sshTool, caPath, hostList) self.logger.debug("Add hostname to config file.") configPath = os.path.join(appPath, "share/sslcert/grpc/openssl.cnf") self.logger.debug("The ca file dir is: %s." % caPath) # Add hostname to openssl.cnf file. DefaultValue.changeOpenSslConf(configPath, hostList) self.logger.debug("Generate CA files.") DefaultValue.createCA(DefaultValue.GRPC_CA, caPath) # Clean useless files, and change permission of ca file to 600. DefaultValue.cleanCaDir(caPath) self.logger.debug("Scp CA files to all nodes.") except Exception as e: certFile = caPath + "/demoCA/cacertnew.pem" if os.path.exists(certFile): g_file.removeFile(certFile) DefaultValue.cleanCaDir(caPath) raise Exception(str(e)) for certFile in DefaultValue.GRPC_CERT_LIST: scpFile = os.path.join(caPath, "%s" % certFile) self.sshTool.scpFiles(scpFile, caPath, hostList) self.logger.debug("Successfully generated grpc CA files.") def genCipherAndRandFile(self, hostList=None, initPwd=None): self.logger.debug("Encrypting cipher and rand files.") if hostList is None: hostList = [] appPath = DefaultValue.getInstallDir(self.user) binPath = os.path.join(appPath, "bin") retry = 0 while True: if not initPwd: sshpwd = getpass.getpass("Please enter password for database:") sshpwd_check = getpass.getpass("Please repeat for database:") else: sshpwd = sshpwd_check = initPwd if sshpwd_check != sshpwd: sshpwd = "" sshpwd_check = "" self.logger.error(ErrorCode.GAUSS_503["GAUSS_50306"] % "database" + "The two passwords are different, " "please enter password again.") else: cmd = "%s/gs_guc encrypt -M server -K %s -D %s " % ( binPath, sshpwd, binPath) (status, output) = subprocess.getstatusoutput(cmd) sshpwd = "" sshpwd_check = "" initPwd = "" if status != 0: self.logger.error(ErrorCode.GAUSS_503["GAUSS_50322"] % "database" + "Error:\n %s" % output) else: break if retry >= 2: raise Exception(ErrorCode.GAUSS_503["GAUSS_50322"] % "database") retry += 1 g_file.changeMode(DefaultValue.KEY_FILE_MODE, "'%s'/server.key.cipher" % binPath) g_file.changeMode(DefaultValue.KEY_FILE_MODE, "'%s'/server.key.rand" % binPath) if len(hostList) == 0: for dbNode in self.clusterInfo.dbNodes: hostList.append(dbNode.name) if not self.isSingle: # localhost no need scp files for certFile in DefaultValue.BIN_CERT_LIST: scpFile = os.path.join(binPath, "%s" % certFile) self.sshTool.scpFiles(scpFile, binPath, hostList) self.logger.debug("Successfully encrypted cipher and rand files.")