def __init__(self, cluster_config, node_config, clean): self.cluster_config = cluster_config self.node_config = node_config self.maintain_config = common.load_yaml_file("k8sPaiLibrary/maintainconf/repair.yaml") self.jobname = "repair" self.clean_flag = clean
def __init__(self, cluster_config, **kwargs): self.logger = logging.getLogger(__name__) self.cluster_config = cluster_config self.maintain_config = common.load_yaml_file( "k8sPaiLibrary/maintainconf/deploy.yaml") self.clean_flag = kwargs["clean"]
def __init__(self, cluster_config, **kwargs): self.logger = logging.getLogger(__name__) self.cluster_config = cluster_config maintain_configuration_path = os.path.join( package_directory_deploy, "../maintainconf/deploy.yaml") self.maintain_config = common.load_yaml_file( maintain_configuration_path) self.clean_flag = kwargs["clean"]
def __init__(self, cluster_config, node_config, clean): self.cluster_config = cluster_config self.node_config = node_config maintain_configuration_path = os.path.join( package_directory_repair, "../maintainconf/repair.yaml") self.maintain_config = common.load_yaml_file( maintain_configuration_path) self.jobname = "repair" self.clean_flag = clean
def __init__(self, cluster_config, node_config, clean): self.logger = logging.getLogger(__name__) self.cluster_config = cluster_config self.node_config = node_config self.maintain_config = common.load_yaml_file( "k8sPaiLibrary/maintainconf/remove.yaml") self.clean_flag = clean self.jobname = "remove-node"
def __init__(self, cluster_object_model, **kwargs): self.logger = logging.getLogger(__name__) self.cluster_object_model = cluster_object_model maintain_configuration_path = os.path.join(package_directory_clean, "../maintainconf/clean.yaml") self.maintain_config = common.load_yaml_file(maintain_configuration_path) self.clean_flag = kwargs["clean"] self.force_flag = kwargs["force"] self.jobname = "clean"
def __init__(self, cluster_config, node_config, clean): self.logger = logging.getLogger(__name__) self.cluster_config = cluster_config self.node_config = node_config maintain_configuration_path = os.path.join(package_directory_remove, "../maintainconf/remove.yaml") self.maintain_config = common.load_yaml_file(maintain_configuration_path) self.clean_flag = clean self.jobname = "remove-node"
def __init__(self, cluster_config, node_config, clean): self.logger = logging.getLogger(__name__) self.logger.info("Initialize class etcdfix to fix the broken etcd member on {0}".format(node_config["nodename"])) self.logger.debug("Node-configuration: {0}".format(str(node_config))) self.cluster_config = cluster_config self.bad_node_config = node_config maintain_configuration_path = os.path.join(package_directory_etcdfix, "../maintainconf/etcdfix.yaml") self.maintain_config = common.load_yaml_file(maintain_configuration_path) self.clean_flag = clean
def __init__(self, cluster_config, node_config, clean): self.logger = logging.getLogger(__name__) self.logger.info( "Initialize class etcdfix to fix the broken etcd member on {0}". format(node_config["nodename"])) self.logger.debug("Node-configuration: {0}".format(str(node_config))) self.cluster_config = cluster_config self.bad_node_config = node_config self.maintain_config = common.load_yaml_file( "k8sPaiLibrary/maintainconf/etcdfix.yaml") self.clean_flag = clean
def __init__(self, cluster_config, node_config, clean): self.logger = logging.getLogger(__name__) self.cluster_config = cluster_config self.node_config = node_config self.maintain_config = common.load_yaml_file("k8sPaiLibrary/maintainconf/add.yaml") self.clean_flag = clean if node_config['k8s-role'] == 'worker': self.jobname = "add-worker-node" else: self.jobname = "error" self.logger.error("[{0}] Error: {1} is an undefined role, quit add job in host [{2}]".format(time.asctime(), node_config['k8s-role'], node_config['nodename']))
def check(self): self.logger.info("Checking kubectl's configuration for paictl.") if not os.path.exists(self.kube_conf_path): self.logger.warning( "CHECKING FAILED: The path {0} doesn't exist.".format( self.kube_conf_path)) return False self.logger.info("CHECKING PASS: The path {0} exists.".format( self.kube_conf_path)) if not os.path.isfile("{0}/config".format(self.kube_conf_path)): self.logger.warning( "CHECKING FAILED: The configuration file {0}/config doesn't exist." .format(self.kube_conf_path)) return False self.logger.info( "CHECKING PASS: The configuration file {0}/config exists.".format( self.kube_conf_path)) try: local_kubectl_conf = common.load_yaml_file("{0}/config".format( self.kube_conf_path)) api_server_address = local_kubectl_conf['clusters'][0]['cluster'][ 'server'] api_server_address_pai_conf = "http://{0}:8080".format( self.cluster_config['clusterinfo']['api-servers-ip']) if api_server_address != api_server_address_pai_conf: self.logger.warning( "CHECKING FAILED: The api_server_address in local configuration is different from the one in pai's configuration." .format(self.kube_conf_path)) return False except Exception as e: self.logger.error( "CHECK FAILED: Unable to compare api_server_address in the configuration." ) return False self.logger.info("Kubectl environment checking task is passed.") return True
def __init__(self, cluster_config, node_config, clean): self.logger = logging.getLogger(__name__) self.cluster_config = cluster_config self.node_config = node_config maintain_configuration_path = os.path.join(package_directory_add, "../maintainconf/add.yaml") self.maintain_config = common.load_yaml_file(maintain_configuration_path) self.clean_flag = clean if node_config['k8s-role'] == 'worker': self.jobname = "add-worker-node" elif node_config['k8s-role'] == 'master': self.jobname = "add-master-node" else: self.jobname = "error" self.logger.error("[{0}] Error: {1} is an undefined role, quit add job in host [{2}]".format(time.asctime(), node_config['k8s-role'], node_config['nodename']))
def check_docker_daemon_status(outputFile, configFilePath): cluster_config = common.load_yaml_file(configFilePath) node_configs = cluster_config['machine-list'] username = "" password = "" sshport = "" if "default-machine-properties" in cluster_config: if "username" in cluster_config["default-machine-properties"]: username = cluster_config["default-machine-properties"]["username"] if "password" in cluster_config["default-machine-properties"]: password = cluster_config["default-machine-properties"]["password"] if "sshport" in cluster_config["default-machine-properties"]: port = cluster_config["default-machine-properties"]["sshport"] # execute cmd to check health cmd = "sudo systemctl is-active docker | if [ $? -eq 0 ]; then echo \"active\"; else exit 1 ; fi" errorNodeCout = 0 for node_config in node_configs: try: if "username" not in node_config or "password" not in node_config or "sshport" not in node_config: node_config["username"] = username node_config["password"] = password node_config["port"] = port flag = common.ssh_shell_paramiko(node_config, cmd) if not flag: errorNodeCout += 1 # single node docker health logger.error( "node_current_docker_error{{instance=\"{}\"}} {}\n".format( node_config["hostip"], 1)) except: exception = sys.exc_info() for e in exception: logger.error("watchdog error {}".format(e)) errorNodeCout += 1 # single node docker health logger.error( "node_current_docker_error{{instance=\"{}\"}} {}\n".format( node_config["hostip"], 1)) if errorNodeCout > 0: # aggregate all nodes docker health total count logger.error("docker_error_node_count {}\n".format(errorNodeCout)) outputFile.write("docker_error_node_count {}\n".format(errorNodeCout))
def collect_docker_daemon_status(configFilePath): metrics = [] cluster_config = common.load_yaml_file(configFilePath) node_configs = cluster_config['machine-list'] username = "" password = "" sshport = "" if "default-machine-properties" in cluster_config: if "username" in cluster_config["default-machine-properties"]: username = cluster_config["default-machine-properties"]["username"] if "password" in cluster_config["default-machine-properties"]: password = cluster_config["default-machine-properties"]["password"] if "sshport" in cluster_config["default-machine-properties"]: port = cluster_config["default-machine-properties"]["sshport"] cmd = "sudo systemctl is-active docker | if [ $? -eq 0 ]; then echo \"active\"; else exit 1 ; fi" errorNodeCout = 0 for node_config in node_configs: ip = node_config["hostip"] label = {"instance": ip} try: if "username" not in node_config or "password" not in node_config or "sshport" not in node_config: node_config["username"] = username node_config["password"] = password node_config["port"] = port flag = common.ssh_shell_paramiko(node_config, cmd) if not flag: errorNodeCout += 1 # single node docker health metrics.append(Metric("node_current_docker_error", label, 1)) except Exception as e: logger.exception("ssh to %s failed", ip) errorNodeCout += 1 metrics.append(Metric("node_current_docker_error", label, 1)) if errorNodeCout > 0: metrics.append(Metric("docker_error_node_count", {}, errorNodeCout)) return metrics
def load_machine_list(configFilePath): cluster_config = common.load_yaml_file(configFilePath) return cluster_config['hosts']