def _chk_date_status(node): if node == 'node0': print_with_color("check date: node0") os.system("date +\"%Y-%m-%d %H:%M:%S\"") return print_with_color("check date: " + node) os.system('ssh -o LogLevel=ERROR ' + node + ' date +\"%Y-%m-%d\ %H:%M:%S\"')
def _chk_netstat_status(node): if node == 'node0': print_with_color("check netstat: node0") os.system("netstat -tuplan") return print_with_color("check netstat: " + node) os.system("ssh -o LogLevel=ERROR " + node + " netstat -tuplan")
umt_cmd='sudo umount /home/lidong/vagrant/' vag_dir='/home/lidong/vagrant' if __name__ == "__main__": if not os.path.exists(vag_dir): os.mkdir(vag_dir) os.symlink("/home/lidong/workspace", vag_dir + "/node0") pattern = "PING node\d+ \((\d{3}.\d{3}.\d{1,3}.\d{1,3})\) .*? (\d{1}) received, .*?" ping = re.compile(pattern, re.S) for i in range(1, 6, 1): node = 'node' + str(i) path = os.path.join(vag_dir, node) if not os.path.exists(path): os.mkdir(path) os.system(umt_cmd + node + " 2>/dev/null") text = os.popen("ping -c 1 -W 1 " + node).read() res = ping.match(text) if res is not None: ip = res.group(1) cc = res.group(2) if cc == '0': print_with_color("ping " + node + " fail") continue print_with_color("mount: " + node) os.system(mnt_cmd.format(ip, node))
if __name__ == "__main__": if len(sys.argv) != 2: print("Use: ", sys.argv[0], " [hadoop|yarn|hbase|ha-doc|hb-doc]") sys.exit(0) arg = sys.argv[1] if arg == 'hadoop': # 打开Hadoop管理界面 # 50070端口, nn1, nn2见hdfs-site.xml res = os.popen( "hdfs haadmin -getServiceState nn1 | grep active").read() for line in res.split('\n'): if line == 'active': print_with_color("Open Hadoop Admin Web(node0)") open_web("http://node0:50070") sys.exit(0) print_with_color("Open Hadoop Admin Web(node1)") open_web("http://node1:50070") elif arg == 'yarn': # 打开YARN状态界面 # 8088端口, rm1见yarn-site.xml res = os.popen( "yarn rmadmin -getServiceState rm1 | grep active").read() for line in res.split('\n'): if line == 'active': print_with_color("Open Yarn Status Web(node0)") open_web("http://node0:8088") sys.exit(0)
def _chk_jps_status(node): """ 查看集群中每个节点的jps进程 """ print_with_color("check jps: " + node) os.system("ssh -o LogLevel=ERROR " + node + " jps | grep -vi jps")
def __chk_rm_status(): print_with_color("check rm: node0") os.system("yarn rmadmin -getServiceState rm1"); print_with_color("check rm: node5") os.system("yarn rmadmin -getServiceState rm2");
def _chk_ulimit_status(node): """ 查看集群中每个节点的limit -a """ print_with_color("check limit: " + node) os.system("ssh -o LogLevel=ERROR " + node + " ulimit -a")
def _chk_df_status(node): if node == 'node0': return print_with_color("check df: " + node) os.system("ssh -o LogLevel=ERROR " + node + " df -h | grep sda1")
def _chk_zk_status(): for i in ('2', '3', '4'): node = "node" + i print_with_color("-------> " + node) os.system("ssh -o LogLevel=ERROR " + node + " \". /data/opt/env.sh; zkServer.sh status\"");