def job_executer(self, node_config): self.logger.info("{0} job begins !".format(self.jobname)) # sftp your script to remote host with paramiko. srcipt_package = "{0}.tar".format(self.jobname) src_local = "parcel-center/{0}".format(node_config["nodename"]) dst_remote = "/home/{0}".format(node_config["username"]) if common.sftp_paramiko(src_local, dst_remote, srcipt_package, node_config) == False: return commandline = "tar -xvf {0}.tar".format(self.jobname, node_config['hostip']) if common.ssh_shell_paramiko(node_config, commandline) == False: self.logger.error("Failed to uncompress {0}.tar".format( self.jobname)) return commandline = "sudo ./{0}/kubernetes-cleanup.sh".format(self.jobname) if common.ssh_shell_paramiko(node_config, commandline) == False: self.logger.error( "Failed to cleanup the kubernetes deployment on {0}".format( node_config['hostip'])) return self.logger.info("Successfully running {0} job on node {1}".format( self.jobname, node_config["nodename"]))
def update_etcd_cluster(self, good_node_config, bad_node_config): self.prepare_package(good_node_config, "etcd-reconfiguration-update") self.logger.info( "Begin to execute the job : etcd-reconfiguration-update.") self.logger.info("Update etcd cluster on host [{0}].".format( good_node_config['nodename'])) script_package = "etcd-reconfiguration-update.tar" src_local = "parcel-center/{0}".format(good_node_config["nodename"]) dst_remote = "/home/{0}".format(good_node_config["username"]) if common.sftp_paramiko(src_local, dst_remote, script_package, good_node_config) == False: return commandline = "tar -xvf {0}.tar && sudo ./{0}/{1}.sh {2} {3}".format( "etcd-reconfiguration-update", "update-etcd-cluster", bad_node_config['hostip'], bad_node_config['etcdid']) if common.ssh_shell_paramiko(good_node_config, commandline) == False: return self.logger.info( "Successfully update etcd cluster configuration on node {0}". format(bad_node_config["nodename"])) if self.clean_flag: self.delete_packege(good_node_config)
def stop_bad_etcd_server(self, bad_node_config): self.prepare_package(bad_node_config, "etcd-reconfiguration-stop") self.logger.info( "Begin to execute the job : etcd-reconfiguration-stop.") self.logger.info("Stop the bad etcd server on host [{0}]".format( bad_node_config['nodename'])) script_package = "etcd-reconfiguration-stop.tar" src_local = "parcel-center/{0}".format(bad_node_config["nodename"]) dst_remote = "/home/{0}".format(bad_node_config["username"]) if common.sftp_paramiko(src_local, dst_remote, script_package, bad_node_config) == False: return commandline = "tar -xvf {0}.tar && sudo ./{0}/stop-etcd-server.sh".format( "etcd-reconfiguration-stop") if common.ssh_shell_paramiko(bad_node_config, commandline) == False: return self.logger.info( "Successfully stoping bad etcd server on node {0}".format( bad_node_config["nodename"])) if self.clean_flag: self.delete_packege(bad_node_config)
def job_executer(self, node_config): self.logger.info("{0} job begins !".format(self.jobname)) # sftp your script to remote host with paramiko. srcipt_package = "{0}.tar".format(self.jobname) src_local = "parcel-center/{0}".format(node_config["nodename"]) dst_remote = common.get_user_dir(node_config) if common.sftp_paramiko(src_local, dst_remote, srcipt_package, node_config) == False: sys.exit(1) commandline = "tar -xvf {0}.tar".format(self.jobname, node_config['hostip']) if common.ssh_shell_paramiko(node_config, commandline) == False: self.logger.error("Failed to uncompress {0}.tar".format( self.jobname)) sys.exit(1) commandline = "sudo /bin/bash {0}/kubernetes-cleanup.sh".format( self.jobname) if self.force_flag: commandline += " -f" if common.ssh_shell_with_password_input_paramiko( node_config, commandline) == False: self.logger.error( "Failed to cleanup the kubernetes deployment on {0}".format( node_config['hostip'])) sys.exit(1) self.logger.info("Successfully running {0} job on node {1}".format( self.jobname, node_config["nodename"]))
def job_executer_add_work_node(self): self.logger.info("{0} job begins !".format(self.jobname)) # sftp your script to remote host with paramiko. srcipt_package = "{0}.tar".format(self.jobname) src_local = "parcel-center/{0}".format(self.node_config["nodename"]) dst_remote = common.get_user_dir(self.node_config) if common.sftp_paramiko(src_local, dst_remote, srcipt_package, self.node_config) == False: sys.exit(1) commandline = "tar -xvf {0}.tar".format(self.jobname, self.node_config['hostip']) if common.ssh_shell_paramiko(self.node_config, commandline) == False: self.logger.error("Failed to uncompress {0}.tar".format(self.jobname)) sys.exit(1) commandline = "sudo ./{0}/hosts-check.sh {1}".format(self.jobname, self.node_config['hostip']) if common.ssh_shell_with_password_input_paramiko(self.node_config, commandline) == False: self.logger.error("Failed to update the /etc/hosts on {0}".format(self.node_config['hostip'])) sys.exit(1) commandline = "sudo ./{0}/docker-ce-install.sh".format(self.jobname) if common.ssh_shell_with_password_input_paramiko(self.node_config, commandline) == False: self.logger.error("Failed to install docker-ce on {0}".format(self.node_config['hostip'])) sys.exit(1) commandline = "sudo ./{0}/kubelet-start.sh {0}".format(self.jobname) if common.ssh_shell_with_password_input_paramiko(self.node_config, commandline) == False: self.logger.error("Failed to bootstrap kubelet on {0}".format(self.node_config['hostip'])) sys.exit(1) self.logger.info("Successfully running {0} job on node {1}".format(self.jobname, self.node_config["nodename"]))
def job_executer(self, node_config, job_name): # sftp your script to remote host with paramiko. srcipt_package = "{0}.tar".format(job_name) src_local = "parcel-center/{0}".format(node_config["nodename"]) dst_remote = "/home/{0}".format(node_config["username"]) if common.sftp_paramiko(src_local, dst_remote, srcipt_package, node_config) == False: return commandline = "tar -xvf {0}.tar".format(job_name, node_config['hostip']) if common.ssh_shell_paramiko(node_config, commandline) == False: self.logger.error("Failed to uncompress {0}.tar".format(job_name)) return commandline = "sudo ./{0}/hosts-check.sh {1}".format( job_name, node_config['hostip']) if common.ssh_shell_paramiko(node_config, commandline) == False: self.logger.error("Failed to update the /etc/hosts on {0}".format( node_config['hostip'])) return commandline = "sudo ./{0}/docker-ce-install.sh".format(job_name) if common.ssh_shell_paramiko(node_config, commandline) == False: self.logger.error("Failed to install docker-ce on {0}".format( node_config['hostip'])) return commandline = "sudo ./{0}/kubelet-start.sh {0}".format(job_name) if common.ssh_shell_paramiko(node_config, commandline) == False: self.logger.error("Failed to bootstrap kubelet on {0}".format( node_config['hostip'])) return self.logger.info("Successfully running {0} job on node {1}!".format( job_name, node_config['hostip']))
def check_docker_daemon_status(outputFile, configFilePath): cluster_config = common.load_yaml_file(configFilePath) node_configs = cluster_config['machine-list'] username = "" password = "" sshport = "" if "default-machine-properties" in cluster_config: if "username" in cluster_config["default-machine-properties"]: username = cluster_config["default-machine-properties"]["username"] if "password" in cluster_config["default-machine-properties"]: password = cluster_config["default-machine-properties"]["password"] if "sshport" in cluster_config["default-machine-properties"]: port = cluster_config["default-machine-properties"]["sshport"] # execute cmd to check health cmd = "sudo systemctl is-active docker | if [ $? -eq 0 ]; then echo \"active\"; else exit 1 ; fi" errorNodeCout = 0 for node_config in node_configs: try: if "username" not in node_config or "password" not in node_config or "sshport" not in node_config: node_config["username"] = username node_config["password"] = password node_config["port"] = port flag = common.ssh_shell_paramiko(node_config, cmd) if not flag: errorNodeCout += 1 # single node docker health logger.error( "node_current_docker_error{{instance=\"{}\"}} {}\n".format( node_config["hostip"], 1)) except: exception = sys.exc_info() for e in exception: logger.error("watchdog error {}".format(e)) errorNodeCout += 1 # single node docker health logger.error( "node_current_docker_error{{instance=\"{}\"}} {}\n".format( node_config["hostip"], 1)) if errorNodeCout > 0: # aggregate all nodes docker health total count logger.error("docker_error_node_count {}\n".format(errorNodeCout)) outputFile.write("docker_error_node_count {}\n".format(errorNodeCout))
def job_executer(self): print "repair job begins !" # sftp your script to remote host with paramiko. srcipt_package = "repair.tar" src_local = "parcel-center/{0}".format(self.node_config["nodename"]) dst_remote = "/home/{0}".format(self.node_config["username"]) if common.sftp_paramiko(src_local, dst_remote, srcipt_package, self.node_config) == False: return commandline = "tar -xvf repair.tar && sudo ./repair/repair-worker-node.sh" if common.ssh_shell_paramiko(self.node_config, commandline) == False: return print "Successfully running repair job on node {0}".format(self.node_config["nodename"])
def collect_docker_daemon_status(configFilePath): metrics = [] cluster_config = common.load_yaml_file(configFilePath) node_configs = cluster_config['machine-list'] username = "" password = "" sshport = "" if "default-machine-properties" in cluster_config: if "username" in cluster_config["default-machine-properties"]: username = cluster_config["default-machine-properties"]["username"] if "password" in cluster_config["default-machine-properties"]: password = cluster_config["default-machine-properties"]["password"] if "sshport" in cluster_config["default-machine-properties"]: port = cluster_config["default-machine-properties"]["sshport"] cmd = "sudo systemctl is-active docker | if [ $? -eq 0 ]; then echo \"active\"; else exit 1 ; fi" errorNodeCout = 0 for node_config in node_configs: ip = node_config["hostip"] label = {"instance": ip} try: if "username" not in node_config or "password" not in node_config or "sshport" not in node_config: node_config["username"] = username node_config["password"] = password node_config["port"] = port flag = common.ssh_shell_paramiko(node_config, cmd) if not flag: errorNodeCout += 1 # single node docker health metrics.append(Metric("node_current_docker_error", label, 1)) except Exception as e: logger.exception("ssh to %s failed", ip) errorNodeCout += 1 metrics.append(Metric("node_current_docker_error", label, 1)) if errorNodeCout > 0: metrics.append(Metric("docker_error_node_count", {}, errorNodeCout)) return metrics
def collect_docker_daemon_status(hosts): metrics = [] cmd = "sudo systemctl is-active docker | if [ $? -eq 0 ]; then echo \"active\"; else exit 1 ; fi" for host in hosts: label = {"ip": host["hostip"], "error": "ok"} try: flag = common.ssh_shell_paramiko(host, cmd) if not flag: label["error"] = "config" # configuration is not correct except Exception as e: label["error"] = str(e) logger.exception("ssh to %s failed", host["hostip"]) metrics.append(Metric("docker_daemon_count", label, 1)) return metrics
def restart_etcd_server(self, bad_node_config): self.logger.info( "Begin to execute the job : etcd-reconfiguration-restart.") self.logger.info("Restart etcd server on host [{0}].".format( bad_node_config['nodename'])) new_etcd_cluster_ips_peer = self.get_etcd_peer_ip_list(bad_node_config) self.cluster_config['clusterinfo'][ 'etcd_cluster_ips_peer'] = new_etcd_cluster_ips_peer self.cluster_config['clusterinfo'][ 'etcd-initial-cluster-state'] = 'existing' self.prepare_package(bad_node_config, "etcd-reconfiguration-restart") script_package = "etcd-reconfiguration-restart.tar" src_local = "parcel-center/{0}".format(bad_node_config["nodename"]) dst_remote = "/home/{0}".format(bad_node_config["username"]) if common.sftp_paramiko(src_local, dst_remote, script_package, bad_node_config) == False: return commandline = "tar -xvf {0}.tar && sudo ./{0}/{1}.sh".format( "etcd-reconfiguration-restart", "restart-etcd-server") if common.ssh_shell_paramiko(bad_node_config, commandline) == False: return self.logger.info( "Successfully restarting bad etcd server on node {0}".format( bad_node_config["nodename"])) if self.clean_flag: self.delete_packege(bad_node_config)
def job_executer_clean_up_node(self): self.logger.info("{0} job begins !".format(self.jobname)) commandline = "kubectl delete node {0}".format( self.node_config['nodename']) common.execute_shell( commandline, "Failed to delete node {0}".format(self.node_config['nodename'])) # sftp your script to remote host with paramiko. srcipt_package = "{0}.tar".format(self.jobname) src_local = "parcel-center/{0}".format(self.node_config["nodename"]) dst_remote = common.get_user_dir(self.node_config) if common.sftp_paramiko(src_local, dst_remote, srcipt_package, self.node_config) == False: return commandline = "tar -xvf {0}.tar".format(self.jobname, self.node_config['hostip']) if common.ssh_shell_paramiko(self.node_config, commandline) == False: self.logger.error("Failed to uncompress {0}.tar".format( self.jobname)) return commandline = "sudo ./{0}/kubernetes-cleanup.sh".format(self.jobname) if common.ssh_shell_with_password_input_paramiko( self.node_config, commandline) == False: self.logger.error( "Failed to cleanup the kubernetes deployment on {0}".format( self.node_config['hostip'])) return self.logger.info("Successfully running {0} job on node {1}".format( self.jobname, self.node_config["nodename"]))
def remote_host_cleaner(self, node_config): commandline = "sudo rm -rf {0}*".format(self.jobname) if common.ssh_shell_paramiko(node_config, commandline) == False: return