def update_cert_auth_kubelet(token, ca, master_ip, master_port): """ Configure the kubelet :param token: the token to be in the kubeconfig :param ca: the ca :param master_ip: the master node IP :param master_port: the master node port where the cluster agent listens """ traefik_port = get_traefik_port() kubelet_token = "{}-kubelet".format(token) kubelet_user = "******".format(socket.gethostname()) cert = get_client_cert( master_ip, master_port, "kubelet", kubelet_token, kubelet_user, "system:nodes" ) create_x509_kubeconfig( ca, "127.0.0.1", traefik_port, "kubelet.config", "kubelet", cert["certificate_location"], cert["certificate_key_location"], ) set_arg("--client-ca-file", "${SNAP_DATA}/certs/ca.remote.crt", "kubelet") set_arg( "--node-labels", "microk8s.io/cluster=true,node.kubernetes.io/microk8s-worker=microk8s-worker", "kubelet", ) service("restart", "kubelet")
def update_traefik(master_ip, api_port, nodes_ips): """ Update the traefik configuration """ lock_path = os.path.expandvars("${SNAP_DATA}/var/lock") lock = "{}/no-traefik".format(lock_path) if os.path.exists(lock): os.remove(lock) # add the addresses where we expect to find the API servers addresses = [] # first the node we contact addresses.append({"address": "{}:{}".format(master_ip, api_port)}) # then all the nodes assuming the default port for n in nodes_ips: if n == master_ip: continue addresses.append({"address": "{}:{}".format(n, api_port)}) traefik_providers = os.path.expandvars("${SNAP_DATA}/args/traefik/provider-template.yaml") traefik_providers_out = os.path.expandvars("${SNAP_DATA}/args/traefik/provider.yaml") with open(traefik_providers) as f: p = yaml.safe_load(f) p["tcp"]["services"]["kube-apiserver"]["loadBalancer"]["servers"] = addresses with open(traefik_providers_out, "w") as out_file: yaml.dump(p, out_file) try_set_file_permissions(traefik_providers_out) service("restart", "traefik")
def reset_current_dqlite_worker_installation(): """ Take a node out of a cluster """ print("Configuring services.", flush=True) disable_traefik() os.remove(ca_cert_file) service("stop", "apiserver") service("stop", "k8s-dqlite") time.sleep(10) rebuild_client_config() print("Generating new cluster certificates.", flush=True) reinit_cluster() for config_file in ["kubelet", "kube-proxy"]: shutil.copyfile( "{}/default-args/{}".format(snap_path, config_file), "{}/args/{}".format(snapdata_path, config_file), ) for user in ["proxy", "kubelet"]: config = "{}/credentials/{}.config".format(snapdata_path, user) shutil.copyfile("{}.backup".format(config), config) unmark_no_cert_reissue() unmark_worker_node() restart_all_services() apply_cni()
def update_cert_auth_kubeproxy(token, ca, master_ip, master_port, hostname_override): """ Configure the kube-proxy :param token: the token to be in the kubeconfig :param ca: the ca :param master_ip: the master node IP :param master_port: the master node port where the cluster agent listens :param hostname_override: the hostname override in case the hostname is not resolvable """ proxy_token = "{}-proxy".format(token) traefik_port = get_traefik_port() cert = get_client_cert(master_ip, master_port, "kube-proxy", proxy_token, "system:kube-proxy") create_x509_kubeconfig( ca, "127.0.0.1", traefik_port, "proxy.config", "kubeproxy", cert["certificate_location"], cert["certificate_key_location"], ) set_arg("--master", None, "kube-proxy") if hostname_override: set_arg("--hostname-override", hostname_override, "kube-proxy") service("restart", "proxy")
def update_apiserver(api_authz_mode): """ Configure the API server :param api_authz_mode: the authorization mode to be used """ set_arg("--authorization-mode", api_authz_mode, "kube-apiserver") service("restart", "apiserver")
def update_dqlite(cluster_cert, cluster_key, voters, host): """ Configure the dqlite cluster :param cluster_cert: the dqlite cluster cert :param cluster_key: the dqlite cluster key :param voters: the dqlite voters :param host: the hostname others see of this node """ service("stop", "apiserver") time.sleep(10) shutil.rmtree(cluster_backup_dir, ignore_errors=True) shutil.move(cluster_dir, cluster_backup_dir) os.mkdir(cluster_dir) store_cluster_certs(cluster_cert, cluster_key) # We get the dqlite port from the already existing deployment port = 19001 with open("{}/info.yaml".format(cluster_backup_dir)) as f: data = yaml.load(f, Loader=yaml.FullLoader) if "Address" in data: port = data["Address"].split(":")[1] init_data = {"Cluster": voters, "Address": "{}:{}".format(host, port)} with open("{}/init.yaml".format(cluster_dir), "w") as f: yaml.dump(init_data, f) service("start", "apiserver") waits = 10 print("Waiting for this node to finish joining the cluster.", end=" ", flush=True) while waits > 0: try: out = subprocess.check_output( "{snappath}/bin/dqlite -s file://{dbdir}/cluster.yaml -c {dbdir}/cluster.crt " "-k {dbdir}/cluster.key -f json k8s .cluster".format( snappath=snap_path, dbdir=cluster_dir).split(), timeout=4, ) if host in out.decode(): break else: print(".", end=" ", flush=True) time.sleep(5) waits -= 1 except (subprocess.CalledProcessError, subprocess.TimeoutExpired): print("..", end=" ", flush=True) time.sleep(2) waits -= 1 print(" ") with open("{}//certs/csr.conf".format(snapdata_path), "w") as f: f.write("changeme") restart_all_services()
def disable_traefik(): """ Stop traefik """ lock_path = os.path.expandvars("${SNAP_DATA}/var/lock") lock = "{}/no-traefik".format(lock_path) if not os.path.exists(lock): open(lock, "a").close() service("stop", "traefik")
def mark_cluster_node(): """ Mark a node as being part of a cluster by creating a var/lock/clustered.lock """ lock_file = "{}/var/lock/clustered.lock".format(snapdata_path) open(lock_file, "a").close() os.chmod(lock_file, 0o700) services = ["etcd", "apiserver-kicker", "kubelite"] for s in services: service("restart", s)
def update_kubelet(token, ca, master_ip, api_port): """ Configure the kubelet :param token: the token to be in the kubeconfig :param ca: the ca :param master_ip: the master node IP :param api_port: the API server port """ create_kubeconfig(token, ca, master_ip, api_port, "kubelet.config", "kubelet") set_arg("--client-ca-file", "${SNAP_DATA}/certs/ca.remote.crt", "kubelet") service("restart", "kubelet")
def mark_worker_node(): """ Mark a node as being part of a cluster not running the control plane by creating a var/lock/clustered.lock """ locks = ["clustered.lock", "no-k8s-dqlite"] for lock in locks: lock_file = "{}/var/lock/{}".format(snapdata_path, lock) open(lock_file, "a").close() os.chmod(lock_file, 0o700) services = ["kubelite", "etcd", "apiserver-kicker", "traefik", "k8s-dqlite"] for s in services: service("restart", s)
def reset_current_dqlite_installation(): """ Take a node out of a dqlite cluster """ if is_leader_without_successor(): print("This node currently holds the only copy of the Kubernetes " "database so it cannot leave the cluster.") print("To remove this node you can either first remove all other " "nodes with 'microk8s remove-node' or") print( "form a highly available cluster by adding at least three nodes.") exit(3) # We need to: # 1. Stop the apiserver # 2. Send a DELETE request to any member of the dqlite cluster # 3. wipe out the existing installation my_ep, other_ep = get_dqlite_endpoints() service("stop", "apiserver") service("stop", "k8s-dqlite") time.sleep(10) delete_dqlite_node(my_ep, other_ep) print("Generating new cluster certificates.", flush=True) reinit_cluster() service("start", "k8s-dqlite") service("start", "apiserver") apply_cni() unmark_no_cert_reissue() restart_all_services()
def update_kubeproxy(token, ca, master_ip, api_port, hostname_override): """ Configure the kube-proxy :param token: the token to be in the kubeconfig :param ca: the ca :param master_ip: the master node IP :param api_port: the API server port :param hostname_override: the hostname override in case the hostname is not resolvable """ create_kubeconfig(token, ca, master_ip, api_port, "proxy.config", "kubeproxy") set_arg("--master", None, "kube-proxy") if hostname_override: set_arg("--hostname-override", hostname_override, "kube-proxy") service("restart", "proxy")
def update_flannel(etcd, master_ip, master_port, token): """ Configure flannel :param etcd: etcd endpoint :param master_ip: master ip :param master_port: master port :param token: token to contact the master with """ get_etcd_client_cert(master_ip, master_port, token) etcd = etcd.replace("0.0.0.0", master_ip) set_arg("--etcd-endpoints", etcd, "flanneld") set_arg("--etcd-cafile", ca_cert_file_via_env, "flanneld") set_arg("--etcd-certfile", server_cert_file_via_env, "flanneld") set_arg("--etcd-keyfile", "${SNAP_DATA}/certs/server.key", "flanneld") service("restart", "flanneld")
def update_kubelet(token, ca, master_ip, api_port): """ Configure the kubelet :param token: the token to be in the kubeconfig :param ca: the ca :param master_ip: the master node IP :param api_port: the API server port """ create_kubeconfig(token, ca, master_ip, api_port, "kubelet.config", "kubelet") set_arg("--client-ca-file", "${SNAP_DATA}/certs/ca.remote.crt", "kubelet") set_arg( "--node-labels", "microk8s.io/cluster=true,node.kubernetes.io/microk8s-worker=microk8s-worker", "kubelet", ) service("restart", "kubelet")
def reset_current_dqlite_installation(): """ Take a node out of a dqlite cluster """ if is_leader_without_successor(): print( "This node currently holds the only copy of the Kubernetes " "database so it cannot leave the cluster." ) print( "To remove this node you can either first remove all other " "nodes with 'microk8s remove-node' or" ) print("form a highly available cluster by adding at least three nodes.") exit(3) # We need to: # 1. Stop the apiserver # 2. Send a DELETE request to any member of the dqlite cluster # 3. wipe out the existing installation my_ep, other_ep = get_dqlite_endpoints() service("stop", "apiserver") time.sleep(10) delete_dqlite_node(my_ep, other_ep) print("Generating new cluster certificates.", flush=True) shutil.rmtree(cluster_dir, ignore_errors=True) os.mkdir(cluster_dir) if os.path.isfile("{}/cluster.crt".format(cluster_backup_dir)): # reuse the certificates we had before the cluster formation shutil.copy( "{}/cluster.crt".format(cluster_backup_dir), "{}/cluster.crt".format(cluster_dir) ) shutil.copy( "{}/cluster.key".format(cluster_backup_dir), "{}/cluster.key".format(cluster_dir) ) else: # This node never joined a cluster. A cluster was formed around it. hostname = socket.gethostname() # type: str ip = "127.0.0.1" # type: str shutil.copy( "{}/microk8s-resources/certs/csr-dqlite.conf.template".format(snap_path), "{}/var/tmp/csr-dqlite.conf".format(snapdata_path), ) subprocess.check_call( "{}/bin/sed -i s/HOSTNAME/{}/g {}/var/tmp/csr-dqlite.conf".format( snap_path, hostname, snapdata_path ).split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) subprocess.check_call( "{}/bin/sed -i s/HOSTIP/{}/g {}/var/tmp/csr-dqlite.conf".format( snap_path, ip, snapdata_path ).split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) subprocess.check_call( "{0}/usr/bin/openssl req -x509 -newkey rsa:4096 -sha256 -days 3650 -nodes " "-keyout {1}/var/kubernetes/backend/cluster.key " "-out {1}/var/kubernetes/backend/cluster.crt " "-subj /CN=k8s -config {1}/var/tmp/csr-dqlite.conf -extensions v3_ext".format( snap_path, snapdata_path ).split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) # We reset to the default port and address init_data = {"Address": "127.0.0.1:19001"} with open("{}/init.yaml".format(cluster_dir), "w") as f: yaml.dump(init_data, f) service("start", "apiserver") waits = 10 # type: int print("Waiting for node to start.", end=" ", flush=True) time.sleep(10) while waits > 0: try: subprocess.check_call( "{}/microk8s-kubectl.wrapper get service/kubernetes".format(snap_path).split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) subprocess.check_call( "{}/microk8s-kubectl.wrapper apply -f {}/args/cni-network/cni.yaml".format( snap_path, snapdata_path ).split(), stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) break except subprocess.CalledProcessError: print(".", end=" ", flush=True) time.sleep(5) waits -= 1 print(" ") unmark_no_cert_reissue() restart_all_services()