def doCheck(self): global dbList self.result.rst = ResultStatus.OK sqldb = "select datname from pg_database;" output = SharedFuncs.runSqlCmd(sqldb, self.user, "", self.port, self.tmpPath, "postgres", self.mpprcFile) dbList = output.split("\n") dbList.remove("template0") sql = "select (pg_table_size(1259)/count(*)/247.172)::numeric(10,3)" \ " from pg_class;" result = [] for db in dbList: # Calculate the size with sql cmd output = SharedFuncs.runSqlCmd(sql, self.user, "", self.port, self.tmpPath, db, self.mpprcFile) if (float(output) > self.Threshold_NG): self.result.rst = ResultStatus.NG result.append(db) elif (float(output) > self.Threshold_Warning): result.append(db) if (self.result.rst == ResultStatus.OK): self.result.rst = ResultStatus.WARNING if (self.result.rst == ResultStatus.OK): self.result.val = "no system table dilate" else: self.result.val = "there is system table dilate in" \ " databases:\n%s" % "\n".join(result)
def getLocalPrimaryDNid(self, nodeInfo): """ function: Get local primary DNid input: NA output: NA """ tmpFile = os.path.join(self.tmpPath, "gauss_dn_status.dat") primaryDNidList = [] try: # Use cm_ctl to query the current node instance cmd = ClusterCommand.getQueryStatusCmd(self.user, nodeInfo.name, tmpFile) SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) # Match query results and cluster configuration clusterStatus = DbClusterStatus() clusterStatus.initFromFile(tmpFile) if (os.path.exists(tmpFile)): os.remove(tmpFile) # Find the master DB instance for dbNode in clusterStatus.dbNodes: for instance in dbNode.datanodes: if instance.status == 'Primary': primaryDNidList.append(instance.instanceId) return primaryDNidList except Exception as e: if (os.path.exists(tmpFile)): os.remove(tmpFile) raise Exception(str(e))
def doCheck(self): cmd = "gs_om -t status" output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) if (output.find("Normal") < 0 and output.find("Degraded") < 0): self.result.rst = ResultStatus.NG self.result.val = "The database can not be connected." return instanceList = [] AbnormalInst = [] primaryDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) nodeInfo = self.cluster.getDbNodeByName(self.host) localDnList = nodeInfo.datanodes for dn in localDnList: if (dn.instanceId in primaryDnList): instanceList.append(dn) sqlcmd = "select pg_sleep(1);" for instance in instanceList: cmd = "gsql -m -d postgres -p %s -c '%s'" % (instance.port, sqlcmd) if (self.mpprcFile): cmd = "source '%s' && %s" % (self.mpprcFile, cmd) if (os.getuid() == 0): cmd = "su - %s -c \"%s\" " % (self.user, cmd) self.result.raw += "\n%s" % cmd (status, output) = subprocess.getstatusoutput(cmd) if (status != 0 or output.find("connect to server failed") > 0): AbnormalInst.append(instance.instanceId) self.result.val += "The install %s can not be connected.\n" \ % instance.instanceId self.result.raw += "\nError: %s" % output if AbnormalInst: self.result.rst = ResultStatus.NG else: self.result.rst = ResultStatus.OK self.result.val = "The database connection is normal."
def doCheck(self): sql1 = """select a.relname, b.attname ,n.nspname||'.'||a.relname from pg_class a, pg_attribute b, pg_namespace n where a.oid = b.attrelid and b.attisdropped and n.oid = a.relnamespace;""" sqldb = "select datname from pg_database;" output = SharedFuncs.runSqlCmd(sqldb, self.user, "", self.port, self.tmpPath, "postgres", self.mpprcFile) dbList = output.split("\n") dbList.remove("template0") result = "" for db in dbList: output1 = SharedFuncs.runSqlSimplely(sql1, self.user, "", self.port, self.tmpPath, "postgres", self.mpprcFile) if (output1.find("(0 rows)") < 0): result += "%s:\n%s\n" % (db, output1) if (result): self.result.val = "Alter table drop column operation " \ "is did in :\n%s" % result self.result.rst = ResultStatus.NG self.result.raw = sql1 else: self.result.val = "No alter table drop column operation" self.result.rst = ResultStatus.OK
def doCheck(self): parRes = "" # generate hostfile file, server node name self.genhostfile(self.nodes) # shell name shellName = "getClusterInfo.sh" # the path of script shellName = os.path.join(SHELLPATH, shellName) # judge permission self.checkFilePermission(shellName) g_file.replaceFileLineContent('omm', self.user, shellName) g_file.replaceFileLineContent( '\/opt\/huawei\/Bigdata\/mppdb\/.mppdbgs_profile', self.mpprcFile.replace('/', '\/'), shellName) # the shell command exectueCmd = "cd %s && sh %s -p %s" % (SHELLPATH, shellName, self.port) self.result.raw = exectueCmd # Call the shell script SharedFuncs.runShellCmd(exectueCmd, self.user, self.mpprcFile) self.result.rst = ResultStatus.OK pacakageName = os.path.join(self.outPath, "checkcollector_%s" % self.context.checkID) # crate tar package g_file.compressZipFiles(pacakageName, os.path.join(SHELLPATH, 'out')) # Check the result information parRes += "The inspection(checkcollector) has been completed!\n" parRes += "Please perform decompression firstly." \ " The log is saved in '%s.zip'" % (pacakageName) self.result.val = parRes
def doCheck(self): global g_setDict databaseListSql = "select datname from pg_database where datname != " \ "'template0';" sqlCmd = "select group_name from pgxc_group where length(group_name)" \ " != length(group_name::bytea, 'SQL_ASCII');" output = SharedFuncs.runSqlCmd(databaseListSql, self.user, "", self.port, self.tmpPath, "postgres", self.mpprcFile) dbList = output.split("\n") resultStr = "" for databaseName in dbList: output = SharedFuncs.runSqlCmd(sqlCmd, self.user, "", self.port, self.tmpPath, databaseName, self.mpprcFile, True) if not output: continue else: g_setDict[databaseName] = output resultStr += "The node group name of %s with non-SQL_ASCII " \ "characters.\n " % databaseName if (resultStr): self.result.rst = ResultStatus.NG self.result.val = resultStr else: self.result.rst = ResultStatus.OK self.result.val = "The node group name with SQL_ASCII characters" \ " in all databases."
def doSet(self): if os.getuid == 0: cmd = "crontab -l -u '%s'" % self.crontabUser else: cmd = "crontab -l" (status, output) = subprocess.getstatusoutput(cmd) if status != 0 or output.find('om_monitor') < 0: self.result.val = "No gauss process in crontab.\n" return tmpCrondFileName = "gauss_crond_tmp" tmpCrondFile = os.path.join(self.tmpPath, tmpCrondFileName) try: SharedFuncs.createFile(tmpCrondFile, self.tmpPath) SharedFuncs.writeFile(tmpCrondFile, output, self.tmpPath) cmd = "sed -i '/om_monitor/d' %s" % tmpCrondFile SharedFuncs.runShellCmd(cmd) cmd = "crontab %s " % tmpCrondFile if os.getuid == 0: cmd = "su - %s '%s'" % (self.crontabUser, cmd) (status, output) = subprocess.getstatusoutput(cmd) if status != 0: self.result.val = "Failed to cleaned om_monitor in crontab." \ " Error: %s\n" % output + "The cmd is %s " \ % cmd else: self.result.val = "Successfully to cleaned om_monitor " \ "in crontab.\n" SharedFuncs.cleanFile(tmpCrondFile) except Exception as e: if os.path.exists(tmpCrondFile): SharedFuncs.cleanFile(tmpCrondFile) raise Exception(str(e))
def doCheck(self): procadj = {} result = "" prolist = ['om_monitor', 'cm_agent', 'gaussdb', 'cm_server', 'gtm', 'etcd'] gausshome = self.cluster.appPath gaussdbpath = os.path.join(gausshome, "bin/gaussdb") for process in prolist: if (process == 'gaussdb'): getpidcmd = "ps ux| grep '%s'|grep -v 'grep'|awk '{print " \ "$2}'" \ % gaussdbpath else: getpidcmd = "ps ux| grep '%s'|grep -v 'grep'|awk '{print " \ "$2}'" \ % process pids = SharedFuncs.runShellCmd(getpidcmd) for pid in pids.splitlines(): getAdjcmd = "cat /proc/%s/oom_adj" % pid adjValue = SharedFuncs.runShellCmd(getAdjcmd) if (int(adjValue) < 0): tmpkey = "%s_%s" % (process, pid) procadj[tmpkey] = adjValue if (procadj): self.result.rst = ResultStatus.NG for key, value in procadj.items(): result += "%s : %s \n" % (key, value) self.result.val = "There are processes omm_adj value " \ "less than 0 \n%s" % (result) else: self.result.rst = ResultStatus.OK self.result.val = "All key processes omm_adj value" \ " are not less than 0"
def output(self, outPath): u""" [HOST] {host} [NAM] {name} [RST] {rst} [VAL] {val} [RAW] {raw} """ val = self.val if self.val else "" raw = self.raw if self.raw else "" try: content = self.output.__doc__.format(name=self.name, rst=self.rst, host=self.host, val=val, raw=raw) except Exception: content = self.output.__doc__.encode('utf-8').format( name=self.name, rst=self.rst, host=self.host, val=val, raw=raw).decode('utf-8', 'ignore') fileName = "%s_%s_%s.out" % (self.name, self.host, self.checkID) # output the result to local path SharedFuncs.writeFile(fileName, content, outPath, DefaultValue.KEY_FILE_MODE, self.user)
def checkSysTable(self): primaryDNidList = [] nodeInfo = self.cluster.getDbNodeByName(self.host) CN = nodeInfo.coordinators masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) for DnInstance in nodeInfo.datanodes: if (DnInstance.instanceId in masterDnList): primaryDNidList.append(DnInstance) if (len(CN) < 1 and len(primaryDNidList) < 1): raise CheckNAException( "There is no primary database node instance in the " "current node.") # test database Connection for Instance in (CN + primaryDNidList): if not Instance: continue sqlcmd = "select pg_sleep(1);" SharedFuncs.runSqlCmd(sqlcmd, self.user, "", Instance.port, self.tmpPath, self.database, self.mpprcFile) outputList = [] pool = ThreadPool(DefaultValue.getCpuSet()) results = pool.map(self.checkSingleSysTable, CN + primaryDNidList) pool.close() pool.join() for result in results: if (result): outputList.append(result) outputList.sort() return outputList
def doCheck(self): ipList = [] routingBinary = self.getBinaryRouting(self.routing) if g_Platform.isPlatFormEulerOSOrRHEL7X(): cmd = "/sbin/ifconfig -a |grep -E '\<inet\>'| awk '{print $2}'" else: cmd = "/sbin/ifconfig -a |grep 'inet addr'|" \ " awk '{print $2}'| awk -F ':' '{print $2}'" output = SharedFuncs.runShellCmd(cmd) for eachLine in output.split('\n'): if (SharedFuncs.validate_ipv4(eachLine)): maskAddr = SharedFuncs.getMaskByIP(eachLine) ipMask = "%s:%s" % (eachLine, maskAddr) ipList.append(ipMask) self.result.raw = "Routing: %s [bit]%s\nlocalIP:\n%s" % ( self.routing, routingBinary, "\n".join(ipList)) commIP = [] for ipMask in ipList: ipBinary = self.getBinaryRouting(ipMask) if (ipBinary == routingBinary): commIP.append(ipMask) if (len(commIP) > 1): self.result.rst = ResultStatus.WARNING else: self.result.rst = ResultStatus.OK self.result.val = "Business network segment IP: " + ", ".join(commIP)
def doCheck(self): sqlcmd1 = "show max_connections;" sqlcmd2 = "SELECT count(*) FROM pg_stat_activity;" self.result.raw = sqlcmd1 + sqlcmd2 output1 = SharedFuncs.runSqlCmd(sqlcmd1, self.user, "", self.port, self.tmpPath, "postgres", self.mpprcFile) output2 = SharedFuncs.runSqlCmd(sqlcmd2, self.user, "", self.port, self.tmpPath, "postgres", self.mpprcFile) if (not (output1.isdigit() and output2.isdigit())): self.result.rst = ResultStatus.ERROR self.result.val = "max_connections: %s\nCurConnCount: %s" % ( output1, output2) maxConnections = float(output1) usedConnections = float(output2) if (maxConnections > 0 and usedConnections > 0): OccupancyRate = (usedConnections // maxConnections) self.result.val = "%.2f%%" % (OccupancyRate * 100) if (OccupancyRate < 0.9): self.result.rst = ResultStatus.OK else: self.result.rst = ResultStatus.NG else: self.result.rst = ResultStatus.ERROR self.result.val = "max_connections: %s\nCurConnCount: %s" % ( maxConnections, usedConnections)
def doCheck(self): if (self.cluster): LocalNodeInfo = self.cluster.getDbNodeByName(self.host) backIP = LocalNodeInfo.backIps[0] elif (self.ipAddr): backIP = self.ipAddr else: backIP = SharedFuncs.getIpByHostName(self.host) networkCardNumList = SharedFuncs.CheckNetWorkBonding(backIP) if networkCardNumList == "Shell command faild": return networkCardNums = [] if (len(networkCardNumList) != 1): networkCardNums = networkCardNumList[1:] else: networkCardNums.append(networkCardNumList[0]) flag = True for networkCardNum in networkCardNums: cmd = "/sbin/ethtool -i %s" % networkCardNum output = SharedFuncs.runShellCmd(cmd) self.result.raw += "[%s]\n%s\n" % (networkCardNum, output) NICVer = "" PCIAddr = "" for eachLine in output.split("\n"): if (eachLine.startswith("version:")): NICVer = eachLine if (eachLine.startswith('bus-info:')): if (len(eachLine.split(':')) == 4): PCIAddr = eachLine.split(':')[2] + ':' + \ eachLine.split(':')[3] if (NICVer): self.result.val += "%s\n" % (NICVer) else: self.result.val += "Failed to get NIC %s 'version' info\n" \ % networkCardNum flag = False if (PCIAddr): cmd = "lspci |grep %s" % PCIAddr (status, output) = subprocess.getstatusoutput(cmd) self.result.raw += "%s\n" % (output) if status == 0 and len(output.split(':')) >= 3: modelInfo = ':'.join(output.split(':')[2:]).split('(')[0] self.result.val += "model: %s\n" % (modelInfo.strip()) else: self.result.val += "Failed to get NIC %s model" \ " 'bus-info' info\n" % networkCardNum self.result.val += "The cmd is %s " % cmd flag = False else: self.result.val += "Failed to get NIC %s model" \ " 'bus-info' info\n" % networkCardNum flag = False if (flag): self.result.rst = ResultStatus.OK else: self.result.rst = ResultStatus.NG
def getTcpUsedPort(self): if (self.ipAddr): serviceIP = self.ipAddr else: serviceIP = SharedFuncs.getIpByHostName(self.host) cmd = "netstat -ano|awk '{print $4}'|grep '%s'|sort|uniq -c|" \ "grep ' 1 '|wc -l" % serviceIP tcpUsed = SharedFuncs.runShellCmd(cmd) return int(tcpUsed)
def getTestFile(self): machine = platform.machine() testSpeedFile = "%s/lib/checknetspeed/speed_test" \ % self.context.basePath if machine == "x86_64": cmd = "cp -p %s_x86 %s" % (testSpeedFile, testSpeedFile) # debian: deepin Maipo: NOE Kylin elif machine == "aarch64": cmd = "cp -p %s_arm %s" % (testSpeedFile, testSpeedFile) else: raise Exception(ErrorCode.GAUSS_530["GAUSS_53017"] % machine) SharedFuncs.runShellCmd(cmd)
def doCheck(self): sql1 = """select distinct rt.relname from PG_ATTRDEF ad, ( select c.oid,c.relname from pg_class c, pgxc_class xc where c.oid = xc.pcrelid and c.relkind = 'r' and xc.pclocatortype = 'R' ) as rt(oid,relname) where ad.adrelid = rt.oid and ad.adsrc like '%nextval%'; """ sql2 = """select relname from pg_class c, pg_namespace n where relkind = 'S' and c.relnamespace = n.oid and n.nspname like 'pg_temp%'; """ sqldb = "select datname from pg_database;" output = SharedFuncs.runSqlCmd(sqldb, self.user, "", self.port, self.tmpPath, "postgres", self.mpprcFile) dbList = output.split("\n") dbList.remove("template0") result = "" for db in dbList: output1 = SharedFuncs.runSqlCmd(sql1, self.user, "", self.port, self.tmpPath, db, self.mpprcFile) tmptablist = [] if (output1): for tab in output1.splitlines(): tmpsql = "select * from %s limit 1" % tab tmpout = SharedFuncs.runSqlCmd(tmpsql, self.user, "", self.port, self.tmpPath, db, self.mpprcFile) if (tmpout): tmptablist.append(tab) else: pass output2 = SharedFuncs.runSqlCmd(sql2, self.user, "", self.port, self.tmpPath, db, self.mpprcFile) if (output2): for tab in output2.splitlines(): if (tab not in tmptablist): tmptablist.append(tab) if (tmptablist): result += "%s:\n%s\n" % (db, "\n".join(tmptablist)) if (result): self.result.val = "there is some default expression " \ "contains nextval(sequence):\n%s" % result self.result.rst = ResultStatus.NG else: self.result.val = "no default expression " \ "contains nextval(sequence)" self.result.rst = ResultStatus.OK
def SetIOSchedulers(self, devname, expectedScheduler): """ function : Set IO Schedulers input : String output : NA """ (THPFile, initFile) = SharedFuncs.getTHPandOSInitFile() cmd = " echo %s >> /sys/block/%s/queue/scheduler" % (expectedScheduler, devname) cmd += " && echo \"echo %s >> /sys/block/%s/queue/scheduler\" >> %s" \ % ( expectedScheduler, devname, initFile) SharedFuncs.runShellCmd(cmd)
def doCheck(self): """ """ global g_gucDist # get ignore list dirName = os.path.dirname(os.path.realpath(__file__)) configFile = "%s/../../config/check_list_%s.conf" % (dirName, self.version) self.getIgnoreParameters(configFile, 'guc_ignore', 'guc_logic') DNidList = [] result = [] logicCluster = False nodeInfo = self.cluster.getDbNodeByName(self.host) masterDnList = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) for DnInstance in nodeInfo.datanodes: if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE): DNidList.append(DnInstance) if len(DNidList) < 1: raise Exception(ErrorCode.GAUSS_512["GAUSS_51249"]) # get information of logicCluster on current node (lcName, dbnode) = self.checkLogicCluster() if (dbnode): logicCluster = True for DnInstance in dbnode.datanodes: if (DnInstance.instanceType != DUMMY_STANDBY_INSTANCE): if (DnInstance.instanceId in masterDnList): needm = False else: needm = True result.append( self.checkInstanceGucValue(DnInstance, needm, lcName, logicCluster)) g_gucDist[lcName] = result # test database Connection for Instance in DNidList: if not Instance: continue sqlcmd = "select pg_sleep(1);" if Instance.instanceId in masterDnList: needm = False else: needm = True output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", Instance.port, self.tmpPath, 'postgres', self.mpprcFile, needm) self.checkInstanceGucValue(Instance, needm, "", logicCluster) self.result.val = json.dumps(g_gucDist) self.result.raw = str(g_gucDist) self.result.rst = ResultStatus.OK
def checkSingleSysTable(self, Instance): tablelist = [ "pg_attribute", "pg_class", "pg_constraint", "pg_partition", "pgxc_class", "pg_index", "pg_stats" ] localPath = os.path.dirname(os.path.realpath(__file__)) resultMap = {} try: for i in tablelist: sqlFile = "%s/sqlFile_%s_%s.sql" % (self.tmpPath, i, Instance.instanceId) resFile = "%s/resFile_%s_%s.out" % (self.tmpPath, i, Instance.instanceId) g_file.createFile(sqlFile, True, DefaultValue.SQL_FILE_MODE) g_file.createFile(resFile, True, DefaultValue.SQL_FILE_MODE) g_file.changeOwner(self.user, sqlFile) g_file.changeOwner(self.user, resFile) sql = "select * from pg_table_size('%s');" % i sql += "select count(*) from %s;" % i sql += "select * from pg_column_size('%s');" % i g_file.writeFile(sqlFile, [sql]) cmd = "gsql -d %s -p %s -f %s --output %s -t -A -X" % ( self.database, Instance.port, sqlFile, resFile) if (self.mpprcFile != "" and self.mpprcFile is not None): cmd = "source '%s' && %s" % (self.mpprcFile, cmd) SharedFuncs.runShellCmd(cmd, self.user) restule = g_file.readFile(resFile) g_file.removeFile(sqlFile) g_file.removeFile(resFile) size = restule[0].strip() line = restule[1].strip() width = restule[2].strip() Role = "" if (Instance.instanceRole == INSTANCE_ROLE_COODINATOR): Role = "CN" elif (Instance.instanceRole == INSTANCE_ROLE_DATANODE): Role = "DN" instanceName = "%s_%s" % (Role, Instance.instanceId) resultMap[i] = [instanceName, size, line, width] return resultMap except Exception as e: if os.path.exists(sqlFile): g_file.removeFile(sqlFile) if os.path.exists(resFile): g_file.removeFile(resFile) raise Exception(str(e))
def doCheck(self): databaseListSql = "select datname from pg_database " \ "where datcompatibility = 'TD';" self.result.raw = databaseListSql output = SharedFuncs.runSqlCmd(databaseListSql, self.user, "", self.port, self.tmpPath, "postgres", self.mpprcFile) if (not output.strip()): self.result.val = "The database with TD mode does not exist." self.result.rst = ResultStatus.OK return dbList = output.strip().split("\n") self.result.raw = "The database of TD mode is: %s\n" % ','.join( output.split('\n')) resultStr = "" sqlcmd = """ select ns.nspname as namespace, c.relname as table_name, attr.attname as column_name from pg_attribute attr, pg_class c , pg_namespace ns where attr.attrelid = c.oid and ns.oid = c.relnamespace and array_to_string(c.reloptions, ', ') like '%orientation=orc%' and attr.atttypid = (select oid from pg_type where typname='date') union all select ns.nspname as namespace, c.relname as table_name, attr.attname as column_name from pg_attribute attr, pg_class c , pg_namespace ns, pg_foreign_table ft where attr.attrelid = c.oid and c.oid = ft.ftrelid and ns.oid = c.relnamespace and array_to_string(ft.ftoptions, ', ') like '%format=orc%' and attr.atttypid = (select oid from pg_type where typname='date'); """ for databaseName in dbList: output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, self.tmpPath, databaseName, self.mpprcFile, True) if (output): self.result.raw += "%s: %s" % (databaseName, output) tableList = output.split('\n') resultStr += "database[%s]: %s\n" % (databaseName, ",".join(tableList)) if (resultStr): self.result.rst = ResultStatus.NG self.result.val = resultStr else: self.result.rst = ResultStatus.OK self.result.val = "The orc table with the date column " \ "in the TD schema database does not exist."
def getDisk(self): diskDic = {} cmd = "fdisk -l 2>/dev/null " \ "| grep 'Disk /dev/' | grep -v '/dev/mapper/' " \ "| awk '{ print $2 }'| awk -F'/' '{ print $NF }'| sed s/:$//g" output = SharedFuncs.runShellCmd(cmd) for disk in output.splitlines(): cmd = "fdisk -l 2>/dev/null | grep '%s'" \ "| grep -v '/dev/mapper/'| grep -v 'Disk /dev/'" \ "| awk -F ' ' ' {print $1}'" % disk output = SharedFuncs.runShellCmd(cmd) if output: diskDic[disk] = output.splitlines() else: diskDic[disk] = "/dev/" + disk return diskDic
def doCheck(self): global networkCards if (self.cluster): # Get node information LocalNodeInfo = self.cluster.getDbNodeByName(self.host) # Get the IP address serviceIP = LocalNodeInfo.backIps[0] elif (self.ipAddr): serviceIP = self.ipAddr else: serviceIP = SharedFuncs.getIpByHostName(self.host) networkCards = g_network.getAllNetworkInfo() for network in networkCards: if (network.ipAddress == serviceIP): networkCardNum = network.NICNum netBondMode = network.networkBondModeInfo break self.result.val = netBondMode self.result.rst = ResultStatus.OK self.result.raw = "%s\n%s\n" % (networkCardNum, netBondMode) bondFile = '/proc/net/bonding/%s' % networkCardNum if (os.path.exists(bondFile)): self.result.raw += bondFile flag1 = g_file.readFile(bondFile, 'BONDING_OPTS') flag2 = g_file.readFile(bondFile, 'BONDING_MODULE_OPTS') if (not flag1 and not flag2): self.result.rst = ResultStatus.NG self.result.val += "\nNo 'BONDING_OPTS' or" \ " 'BONDING_MODULE_OPTS' in bond" \ " config file[%s]." % bondFile
def wrapper(): if (not hasattr(self, 'cluster')): raise Exception(ErrorCode.GAUSS_530["GAUSS_53030"] % "cluster attribute") if (not hasattr(self, 'host')): raise Exception(ErrorCode.GAUSS_530["GAUSS_53030"] % "host attribute") if (not self.cluster): raise Exception(ErrorCode.GAUSS_530["GAUSS_53031"]) dbNode = self.cluster.getDbNodeByName(self.host) # The specified node does not exist or is empty if (dbNode is None or dbNode == ""): raise Exception(ErrorCode.GAUSS_530["GAUSS_53013"] % "The dbNode") if self.cluster.isSingleInstCluster(): masterDn = SharedFuncs.getMasterDnNum(self.user, self.mpprcFile) if len(dbNode.datanodes) < 1 or dbNode.datanodes[ 0].instanceId not in masterDn: raise CheckNAException( "The node does not contains materDn instance") self.port = dbNode.datanodes[0].port else: # The specified CN node does not exist if (len(dbNode.coordinators) == 0): raise CheckNAException( "The node does not contains cn instance") # get cn port self.port = dbNode.coordinators[0].port self.cntype = dbNode.coordinators[0].instanceType return func()
def doCheck(self): self.result.rst = ResultStatus.OK timelist = [] gaussPro = "gaussdb" cmd = "ps -C %s -o lstart,args | grep -v grep | grep -v 'om_monitor'" \ " 2>/dev/null" % gaussPro output = SharedFuncs.runShellCmd(cmd, self.user, self.mpprcFile) for line in output.splitlines()[1:]: resultList = line.split() year = resultList[4] month = monthdic[resultList[1]] day = resultList[2] time = resultList[3] timestring = "%s-%s-%s %s" % (year, month, day, time) dattime = datetime.strptime(timestring, '%Y-%m-%d %H:%M:%S') timelist.append(dattime) if (timelist): mintime = timelist[0] maxtime = timelist[0] else: mintime = None maxtime = None for tmpdatetime in timelist: if (tmpdatetime < mintime): mintime = tmpdatetime elif (tmpdatetime > maxtime): maxtime = tmpdatetime if (maxtime and mintime): if (int((maxtime - mintime).days) > 0 or int( (maxtime - mintime).seconds) > 300): self.result.rst = ResultStatus.WARNING self.result.val = output else: self.result.rst = ResultStatus.OK self.result.val = output
def doCheck(self): # Perform 60-pass disk data collection dic = {} slowDiskList = [] cmd = "for varible1 in {1..30}; do iostat -d -x -k 1 1 " \ "| grep -E -v \"Linux|Device\"|awk 'NF'" \ "|awk '{print $1,$(NF-1)}'; " \ "sleep 1;done" output = SharedFuncs.runShellCmd(cmd) for line in output.splitlines(): diskname = line.split()[0] svctmValue = line.split()[1] if (diskname in dic.keys()): diskList = dic[diskname] diskList.append(float(svctmValue)) dic[diskname] = diskList else: dic[diskname] = [float(svctmValue)] for diskname, svctmValues in dic.items(): diskList = sorted(svctmValues) if (diskList[-1] > self.max and diskList[-10] > self.high): slowDiskList.append(diskname) if (slowDiskList): self.result.rst = ResultStatus.NG self.result.val = "Slow Disk Found:\n%s" % ( "\n".join(slowDiskList)) else: self.result.rst = ResultStatus.OK self.result.val = "No Slow Disk Found"
def collectIORequest(self): """ function : Collector ioRequest input : NA output : Dict """ devices = [] pathList = [] if (self.cluster): pathList = self.obtainDataDir( self.cluster.getDbNodeByName(self.host)) else: pathList = self.obtainDiskDir() diskDict = self.obtainDisk() for path in pathList: cmd = "df -h %s" % path output = SharedFuncs.runShellCmd(cmd) partitionInfo = output.split('\n')[-1] partitionName = partitionInfo.split()[0] if (partitionName in devices): continue else: devices.append(partitionName) result = {} for d in devices: for item in diskDict.items(): if d in item[1]: request = g_file.readFile( "/sys/block/%s/queue/nr_requests" % item[0])[0] result[item[0]] = request.strip() return result
def getDevices(self): pathList = [] devices = [] diskName = "" diskDic = {} diskDic = self.getDisk() if (self.cluster): pathList = self.obtainDataDir( self.cluster.getDbNodeByName(self.host)) else: pathList = self.obtainDiskDir() for path in pathList: if path.find('No such file or directory') >= 0 or path.find( 'no file systems processed') >= 0: self.result.rst = ResultStatus.ERROR self.result.val += \ "There are no cluster and no /data* directory." return cmd = "df -P -i %s" % path output = SharedFuncs.runShellCmd(cmd) # Filesystem Inodes IUsed IFree IUse% Mounted on # /dev/xvda2 2363904 233962 2129942 10% / diskName = output.split('\n')[-1].split()[0] for disk in diskDic.keys(): if diskName in diskDic[disk] and disk not in devices: devices.append(disk) return devices
def checkSar(self, ethName): global errorMsg global serviceIP global speedMsg cmd = "sar -n DEV 1 10|grep %s|grep Average|awk '{print $6}'" \ % ethName output = SharedFuncs.runShellCmd(cmd) if (output.strip() != ""): try: average = float(output.strip()) except Exception as e: errorMsg.append(output.strip()) return errorMsg else: errorMsg.append("get %s RX average failed. commands: %s" % (serviceIP, cmd)) return errorMsg string = "%s RX average is %dkB/s" % (serviceIP, average) if (average < DEFINE_SPEED_WARNING): g_lock.acquire() errorMsg.append(string) g_lock.release() else: speedMsg = string return errorMsg
def doSet(self): sqlcmd = "drop schema pmk cascade;" output = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, self.tmpPath, "postgres", self.mpprcFile) self.result.val = output
def doCheck(self): global g_value dataNum = int(self.nodeCount) * int(self.dnCount) sqlcmd = "SELECT setting FROM pg_settings WHERE " \ "name='comm_max_datanode';" self.result.raw = sqlcmd comm_max_datanode = SharedFuncs.runSqlCmd(sqlcmd, self.user, "", self.port, self.tmpPath, "postgres", self.mpprcFile) if comm_max_datanode.isdigit() and dataNum > int(comm_max_datanode): if (dataNum < 256): g_value = 256 elif (dataNum < 512): g_value = 512 elif (dataNum < 1024): g_value = 1024 else: value = 2048 self.result.rst = ResultStatus.WARNING self.result.val = "Invalid value for GUC parameter " \ "comm_max_datanode: %s. Expect value: %s" % ( comm_max_datanode, g_value) else: self.result.rst = ResultStatus.OK self.result.val = "dataNum: %s, comm_max_datanode: %s" % ( dataNum, comm_max_datanode) self.result.raw = sqlcmd