def __init__(self, org_name: str, course_id: str): """ Helper class to launch grader notebooks within the kubernetes cluster Args: org_name: the organization name course_id: the course id Raises: ConfigException if the kubectl python client does not have a valid configuration set. """ try: # try to load the cluster credentials # Configs can be set in Configuration class directly or using helper utility config.load_incluster_config() except ConfigException: # next method uses the KUBECONFIG env var by default config.load_kube_config() # Uncomment the following lines to enable debug logging self.apps_v1 = client.AppsV1Api() self.coreV1Api = client.CoreV1Api() self.course_id = course_id self.grader_name = f"grader-{self.course_id}" self.grader_token = token_hex(32) self.org_name = org_name # Course home directory, its parent should be the grader name self.course_dir = Path( f"{MNT_ROOT}/{self.org_name}/home/grader-{self.course_id}/{self.course_id}" ) # set the exchange directory path self.exchange_dir = Path(EXCHANGE_MNT_ROOT, self.org_name, "exchange")
def get_configmap(PAI_KUBE_CONFIG_DEFAULT_LOCATION, name, namespace="default"): config.load_kube_config(config_file=PAI_KUBE_CONFIG_DEFAULT_LOCATION) api_instance = kubernetes.client.CoreV1Api() exact = True export = True target_configmap_data = None target_configmap_metadata = None try: api_response = api_instance.read_namespaced_config_map(name, namespace, exact=exact, export=export) target_configmap_data = api_response.data target_configmap_metadata = api_response.metadata except ApiException as e: if e.status == 404: logger.info("Couldn't find configmap named {0}".format(name)) return None else: logger.error( "Exception when calling CoreV1Api->read_namespaced_config_map: {0}" .format(str(e))) sys.exit(1) ret = { "metadata": target_configmap_metadata, "data": target_configmap_data } return ret
def create_secret_in_namespace_if_not_exist(self, payload, namespace): if self.in_cluster: config.load_incluster_config() else: config.load_kube_config(config_file="~/.kube/config") try: api_instance = client.CoreV1Api() api_instance.read_namespaced_secret(payload['metadata']['name'], namespace) except ApiException as e: if e.status == 404: try: api_instance = client.CoreV1Api() meta_data = client.V1ObjectMeta() meta_data.name = payload['metadata']['name'] body = client.V1Secret(metadata=meta_data, data=payload['data']) api_instance.create_namespaced_secret(namespace, body) except ApiException as create_e: logger.error( "Exception when calling CoreV1Api->create_namespaced_secret: %s\n" % create_e) sys.exit(1) else: logger.error( "Exception when calling CoreV1Api->read_namespaced_secret: %s\n" % e) sys.exit(1)
def check_python_kubernetes(self): #configuration = kubernetes.client.Configuration() core_api_instance = client.CoreApi() try_count = 0 while True: try: self.logger.info( "Try to access to the target kubernetes cluster") config.load_kube_config( config_file=self.KUBE_CONFIG_DEFAULT_LOCATION) api_response = core_api_instance.get_api_versions() self.logger.info(str(api_response)) break except ApiException as e: self.logger.error("Failed connect to k8s with python client.") try_count = try_count + 1 if try_count == 3: self.logger.error( "All 3 tries of connecting k8s with python client fails.") sys.exit(1) time.sleep(5) self.logger.info( "CHECKING SUCCESSFULLY: Successfully access kubernetes through python client. " )
def update_configmap(name, data_dict, namespace): config.load_kube_config() api_instance = client.CoreV1Api() meta_data = client.V1ObjectMeta() meta_data.namespace = namespace meta_data.name = name body = client.V1ConfigMap(metadata=meta_data, data=data_dict) try: api_response = api_instance.patch_namespaced_config_map( name, namespace, body) logger.info("configmap named {0} is updated.".format(name)) except ApiException as e: if e.status == 404: try: logger.info( "Couldn't find configmap named {0}. Create a new configmap" .format(name)) api_response = api_instance.create_namespaced_config_map( namespace, body) logger.info("Configmap named {0} is created".format(name)) except ApiException as ie: logger.error( "Exception when calling CoreV1Api->create_namespaced_config_map: {0}" .format(str(e))) sys.exit(1) else: logger.error( "Exception when calling CoreV1Api->patch_namespaced_config_map: {0}" .format(str(e))) sys.exit(1)
def main(): level_str = os.getenv('LOG_LEVEL', 'WARNING').upper() format_str = os.getenv('LOG_FORMAT', '%(asctime)s [%(levelname)s] %(message)s') console = logging.StreamHandler() console.setFormatter(RFC3339Formatter(format_str)) LOGGER.addHandler(console) sleep_time = os.environ.get("SECONDS_BETWEEN_STREAMS", '30') sleep_time = int(sleep_time) try: logging.basicConfig(level=logging.getLevelName(level_str)) except ValueError as err: LOGGER.error(err) sys.exit(1) try: config.load_kube_config() except (FileNotFoundError, ConfigException) as err: LOGGER.debug("Not able to use Kubeconfig: %s", err) try: config.load_incluster_config() except (FileNotFoundError, ConfigException) as err: LOGGER.error("Not able to use in-cluster config: %s", err) sys.exit(1) try: while True: hostess.Watcher(env=os.environ, config=configuration).execute() LOGGER.info("API closed connection, sleeping for %i seconds", sleep_time) time.sleep(sleep_time) except RuntimeError as err: LOGGER.exception(err) sys.exit(1)
def get_kubernetes_node_info_from_API(): config.load_kube_config() api_instance = client.CoreV1Api() # https://github.com/kubernetes-client/python/blob/master/kubernetes/docs/CoreV1Api.md#list_node pretty = 'true' timeout_seconds = 56 ret = dict() try: api_response = api_instance.list_node(pretty=pretty, timeout_seconds=timeout_seconds) for node in api_response.items: ret[node.metadata.name] = { "cpu-resource": int(parse_quantity(node.status.allocatable['cpu'])), "mem-resource": int( parse_quantity(node.status.allocatable['memory']) / 1024 / 1024), "gpu-resource": int(parse_quantity(node.status.allocatable['nvidia.com/gpu'])), } except ApiException as e: logger.error("Exception when calling CoreV1Api->list_node: %s\n" % e) return ret
def update_configmap(PAI_KUBE_CONFIG_DEFAULT_LOCATION, name, data_dict, namespace = "default"): config.load_kube_config(config_file=PAI_KUBE_CONFIG_DEFAULT_LOCATION) api_instance = kubernetes.client.CoreV1Api() meta_data = kubernetes.client.V1ObjectMeta() meta_data.namespace = namespace meta_data.name = name body = kubernetes.client.V1ConfigMap( metadata = meta_data, data = data_dict) try: api_response = api_instance.replace_namespaced_config_map(name, namespace, body) logger.info("configmap named {0} is updated.".format(name)) except ApiException as e: if e.status == 404: try: logger.info("Couldn't find configmap named {0}. Create a new configmap".format(name)) api_response = api_instance.create_namespaced_config_map(namespace, body) logger.info("Configmap named {0} is created".format(name)) except ApiException as ie: logger.error("Exception when calling CoreV1Api->create_namespaced_config_map: {0}".format(str(e))) sys.exit(1) else: logger.error("Exception when calling CoreV1Api->replace_namespaced_config_map: {0}".format(str(e))) sys.exit(1)
def __init__(self): logger.debug('Creating KubernetesClient') config_file = os.getenv('KUBECONFIG', None) context = os.getenv('KUBECONTEXT', DEFAULT_CONTEXT) logger.debug('Configuration file is: ' + os.getenv('KUBECONFIG', 'None')) logger.debug('Configuration context is: ' + context) list_kube_config_contexts = config.list_kube_config_contexts( config_file) print(list_kube_config_contexts) try: logger.debug('Trying to load config.load_incluster_config()') config.load_incluster_config() except Exception as e: s = str(e) logger.critical('Exception when config.load_incluster_config()', exception=s) try: logger.debug('Trying to load config.load_kube_config()') config.load_kube_config(config_file, context) except Exception as e: s = str(e) logger.critical('Exception when config.load_kube_config()', exception=s) # Clients self.ClientV1 = client.CoreV1Api() self.ExtensionsV1beta1Api = client.ExtensionsV1beta1Api()
def get_namespaced_secret(namespace): config.load_kube_config() try: api_instance = client.CoreV1Api() api_response = api_instance.list_namespaced_secret(namespace) return api_response.items except ApiException as e: if e.status == 404: return [] sys.exit(1)
def list_all_secrets_from_namespace(self, namespace): if self.in_cluster: config.load_incluster_config() else: config.load_kube_config(config_file="~/.kube/config") try: api_instance = client.CoreV1Api() api_response = api_instance.list_namespaced_secret(namespace) return api_response.items except ApiException as e: if e.status == 404: return [] logger.error('Exception when calling CoreV1Api->list_namespaced_secret: %s\n' % e) sys.exit(1)
def list_running_job(): config.load_kube_config('/root/.kube/config') configuration = client.Configuration() configuration = client.Configuration() configuration.verify_ssl = False configuration.debug = False client.Configuration.set_default(configuration) v1 = client.CoreV1Api() pods = v1.list_namespaced_pod(namespace="dev").items count = 0 for pod in pods: if pod.metadata.to_dict()["name"].startswith( "appname") and "exec" not in pod.metadata.to_dict()["name"]: count += 1 logger.info("running job {}".format(count)) return count
def get_secret(name, namespace): confirm_namespace(namespace) config.load_kube_config() api_instance = client.CoreV1Api() try: api_response = api_instance.read_namespaced_secret(name, namespace) except ApiException as e: if e.status == 404: logger.info("Couldn't find secret named {0}.".format(name)) return None else: logger.error("Exception when calling CoreV1Api->read_namespaced_config_map: {0}".format(str(e))) sys.exit(1) return api_response.data
def replace_secret_in_namespace(self, payload, namespace): if self.in_cluster: config.load_incluster_config() else: config.load_kube_config(config_file="~/.kube/config") try: api_instance = client.CoreV1Api() meta_data = client.V1ObjectMeta() meta_data.name = payload['metadata']['name'] body = client.V1Secret(metadata=meta_data, data=payload['data']) # don't use patch, which can't handle empty string: https://github.com/kubernetes/kubernetes/issues/37216 api_instance.replace_namespaced_secret(payload['metadata']['name'], namespace, body) except ApiException as e: logger.error( "Exception when calling CoreV1Api->patch_namespaced_secret: %s\n" % e) sys.exit(1)
def create_namespace_if_not_exist(namespace): config.load_kube_config() try: api_instance = client.CoreV1Api() api_instance.read_namespace(namespace) except ApiException as e: if e.status == 404: api_instance = client.CoreV1Api() meta_data = client.V1ObjectMeta() meta_data.name = namespace body = client.V1Namespace( metadata=meta_data ) api_instance.create_namespace(body) return True logger.error("Failed to create namespace [{0}]".format(namespace)) sys.exit(1) return False
def create_group_if_not_exist(self, name): if self.in_cluster: config.load_incluster_config() else: config.load_kube_config(config_file="~/.kube/config") try: api_instance = client.CoreV1Api() api_instance.read_namespace(name) except ApiException as e: if e.status == 404: api_instance = client.CoreV1Api() meta_data = client.V1ObjectMeta() meta_data.name = name body = client.V1Namespace(metadata=meta_data) api_instance.create_namespace(body) return True logger.error("Failed to create namespace [{0}]".format(name)) sys.exit(1) return False
def pod_is_ready_or_not(label_key, label_value, service_name, kubeconfig): label_selector_str = "{0}={1}".format(label_key, label_value) config.load_kube_config(config_file=kubeconfig) v1 = client.CoreV1Api() try: pod_list = v1.list_pod_for_all_namespaces( label_selector=label_selector_str, watch=False) except ApiException as e: logger.error( "Exception when calling CoreV1Api->list_pod_for_all_namespaces: %s\n" % e) return False if len(pod_list.items) == 0: logger.warning("No pod can be dectected.") return False ready = 0 unready = 0 if len(pod_list.items) == 0: return False for pod in pod_list.items: if pod.status.container_statuses is None: unready = unready + 1 continue flag = True for container in pod.status.container_statuses: if container.ready != True: unready = unready + 1 flag = False break if flag: ready = ready + 1 if unready != 0: logger.info("{0} is not ready.".format(service_name)) logger.info("Total: {0}".format(ready + unready)) logger.info("Ready: {0}".format(ready)) return False return True
def get_pai_users(): users = [] config.load_kube_config() api_instance = client.CoreV1Api() try: api_response = api_instance.list_namespaced_secret("pai-user") for item in api_response.items: users.append(base64.b64decode(item.data["username"])) except ApiException as e: if e.status == 404: logger.info("Couldn't find secret in namespace pai-user, exit") sys.exit(1) else: logger.error("Exception when calling CoreV1Api->list_namespaced_secret: {0}".format(str(e))) sys.exit(1) return users
def get_kubernetes_pod_info_from_API(): config.load_kube_config() api_instance = client.CoreV1Api() timeout_seconds = 56 ret = dict() try: api_response = api_instance.list_pod_for_all_namespaces( timeout_seconds=timeout_seconds) for pod in api_response.items: if pod.spec.node_name not in ret: ret[pod.spec.node_name] = [get_pod_requests(pod)] else: ret[pod.spec.node_name].append(get_pod_requests(pod)) except ApiException: logger.error("Exception when calling CoreV1Api->list_pod", exc_info=True) raise return ret
def save_and_clear_k8s(job_folder_name_map): config.load_kube_config('/root/.kube/config') configuration = client.Configuration() configuration = client.Configuration() configuration.verify_ssl = False configuration.debug = False client.Configuration.set_default(configuration) v1 = client.CoreV1Api() pods = v1.list_namespaced_pod(namespace="dev").items for pod in pods: time.sleep(1) status = pod._status.to_dict()["phase"] pod_name = pod.metadata.to_dict()["name"] if not pod_name.startswith("appname"): continue job_id_re = re.search(r"appname(\d+)-", pod_name).groups() if not job_id_re: continue else: job_id = job_id_re[0] if job_id not in job_folder_name_map.keys(): continue if status.lower() in ("failed", "succeeded"): os.makedirs("{}-{}".format(job_folder_name_map[job_id], status.lower()), exist_ok=True, mode=0o777) save_log(pod_name, job_folder_name_map[job_id], status.lower()) save_yaml(pod.to_dict(), job_folder_name_map[job_id], status.lower()) if "exec" in pod_name: continue v1.delete_namespaced_pod(pod_name, "dev") logger.info("deleted pod {}".format(pod_name)) else: os.makedirs("{}-{}".format(job_folder_name_map[job_id], "running"), exist_ok=True, mode=0o777) append_log(pod_name, job_folder_name_map[job_id], "running") save_yaml(pod.to_dict(), job_folder_name_map[job_id], "running")
def delete_secret_content(name, key, namespace): confirm_namespace(namespace) config.load_kube_config() api_instance = client.CoreV1Api() try: api_response = api_instance.read_namespaced_secret(name, namespace) if api_response is not None and type(api_response.data) is dict: removed_content = api_response.data.pop(key, None) if removed_content is not None: meta_data = client.V1ObjectMeta() meta_data.namespace = namespace meta_data.name = name body = client.V1Secret(metadata = meta_data, data = api_response.data) api_instance.replace_namespaced_secret(name, namespace, body) except ApiException as e: if e.status == 404: logger.info("Couldn't find secret named {0}.".format(name)) else: logger.error("Exception when try to delete {0} from {1}: reason: {2}".format(key, name, str(e))) sys.exit(1)
def confirm_namespace(namespace): config.load_kube_config() api_instance = client.CoreV1Api() try: api_response = api_instance.read_namespace(namespace) except ApiException as e: if e.status == 404: logger.info("Couldn't find namespace {0}. Create new namespace".format(namespace)) try: meta_data = client.V1ObjectMeta(name=namespace) body = client.V1ConfigMap(metadata=meta_data) api_response = api_instance.create_namespace(body) logger.info("Namesapce {0} is created".format(namespace)) except ApiException as ie: logger.error("Exception when calling CoreV1Api->create_namespace: {0}".format(str(ie))) sys.exit(1) else: logger.error("Exception when calling CoreV1Api->read_namespace: {0}".format(str(e))) sys.exit(1)
def load_kubernetes_config(): """ Loads kubernetes configuration in cluster or from file to be able to interact with kubernetes api :return: """ config_loaded = False try: config.load_incluster_config() config_loaded = True except config.config_exception.ConfigException: logger.debug( "Unable to load in-cluster configuration; trying to load from Kube config file" ) try: config.load_kube_config() config_loaded = True except (IOError, config.config_exception.ConfigException) as exc: logger.debug("Unable to load Kube config; reason={}".format(exc)) if not config_loaded: logger.error("Unable to load in-cluster or Kube config") raise SystemExit(1)
def submit_job(job_list, index_start): config.load_kube_config('/root/.kube/config') configuration = client.Configuration() configuration = client.Configuration() configuration.verify_ssl = False configuration.debug = False client.Configuration.set_default(configuration) v1 = client.CoreV1Api() running_num = list_running_job() end = index_start + (5 - running_num) if end > len(job_list): end = len(job_list) logger.info("submitting index: from {} to {}".format(index_start, end)) for job_info in job_list[index_start:end]: time.sleep(2) time_stamp = int(time.time()) create_pod_yaml(job_info, time_stamp) submit_pod(v1) job_id = str(job_info[1]) uid = get_pod_uid(job_id, time_stamp) create_confmap_service(uid, job_info, time_stamp) submit_confmap_service(v1) return end
def __init__(self): config_loaded = False try: config.load_incluster_config() config_loaded = True except config.config_exception.ConfigException: logger.warn( "Unable to load in-cluster configuration; trying to load from Kube config file" ) try: config.load_kube_config() config_loaded = True except (IOError, config.config_exception.ConfigException) as exc: logger.warn( "Unable to load Kube config; reason={}".format(exc)) if not config_loaded: logger.error("Unable to load in-cluster or Kube config") sys.exit(1) cli = client.CoreV1Api() cli.api_client.configuration.assert_hostname = False self.client = cli
labelnames=("fn_name",)) def record(fn): @functools.wraps(fn) def wrapped(*args, **kwargs): start = timeit.default_timer() try: return fn(*args, **kwargs) finally: elapsed = timeit.default_timer() - start job_deployer_fn_histogram.labels(fn.__name__).observe(elapsed) return wrapped # The config will be loaded from default location. config.load_kube_config() k8s_client = client.CoreV1Api() class JobDeployer: def __init__(self): self.v1 = k8s_client self.namespace = "default" self.pretty = "pretty_example" @record def create_pod(self, body): api_response = self.v1.create_namespaced_pod( namespace=self.namespace, body=body,
def get_k8s_cluster_info(working_dir, dns_prefix, location): kube_config_path = "{0}/_output/{1}/kubeconfig/kubeconfig.{2}.json".format( working_dir, dns_prefix, location) master_string = "opmaster" worker_string = "opworker" config.load_kube_config(config_file=kube_config_path) api_instance = client.CoreV1Api() pretty = 'true' timeout_seconds = 56 master = dict() worker = dict() sku = None gpu_enable = False master_ip = None master_ip_internal = None worker_count = 0 worker_with_gpu = 0 try: api_response = api_instance.list_node(pretty=pretty, timeout_seconds=timeout_seconds) for node in api_response.items: gpu_resource = 0 if 'nvidia.com/gpu' in node.status.allocatable: gpu_resource = int( parse_quantity(node.status.allocatable['nvidia.com/gpu'])) if master_string in node.metadata.name: master[node.metadata.name] = { "cpu-resource": int(parse_quantity(node.status.allocatable['cpu'])) - 2, "mem-resource": int( parse_quantity(node.status.allocatable['memory']) / 1024 / 1024) - 8 * 1024, "gpu-resource": gpu_resource, } master[node.metadata.name]["hostname"] = node.metadata.name for address in node.status.addresses: if address.type == "Hostname": continue if master_ip == None: master_ip = address.address if address.type == "ExternalIP": master_ip = address.address if address.type == "InternalIP": master[node.metadata.name]["ip"] = address.address master_ip_internal = address.address elif worker_string in node.metadata.name: worker[node.metadata.name] = { "cpu-resource": int(parse_quantity(node.status.allocatable['cpu'])) - 2, "mem-resource": int( parse_quantity(node.status.allocatable['memory']) / 1024 / 1024) - 8 * 1024, "gpu-resource": gpu_resource, } if sku is None: sku = dict() if gpu_resource != 0: sku["gpu_resource"] = worker[ node.metadata.name]["gpu-resource"] sku["mem-unit"] = int( worker[node.metadata.name]["mem-resource"] / worker[node.metadata.name]["gpu-resource"]) sku["cpu-unit"] = int( worker[node.metadata.name]["cpu-resource"] / worker[node.metadata.name]["gpu-resource"]) else: sku["cpu_resource"] = worker[ node.metadata.name]["cpu-resource"] sku["mem-unit"] = int( worker[node.metadata.name]["mem-resource"] / worker[node.metadata.name]["cpu-resource"]) worker_count = worker_count + 1 if worker[node.metadata.name]["gpu-resource"] != 0: worker_with_gpu = worker_with_gpu + 1 gpu_enable = True worker[node.metadata.name]["hostname"] = node.metadata.name for address in node.status.addresses: if address.type == "Hostname": continue if address.type == "InternalIP": worker[node.metadata.name]["ip"] = address.address except ApiException as e: logger.error("Exception when calling CoreV1Api->list_node: %s\n" % e) return { "master": master, "worker": worker, "sku": sku, "gpu": gpu_enable, "gpu-ready": worker_count == worker_with_gpu, "master_ip": master_ip, "master_internal": master_ip_internal, "working_dir": "{0}/{1}".format(working_dir, TEMPORARY_DIR_NAME), "kube_config": "{0}/_output/{1}/kubeconfig/kubeconfig.{2}.json".format( working_dir, dns_prefix, location) }
def get_kubernetes_corev1api(PAI_KUBE_CONFIG_PATH, **kwargs): config.load_kube_config(config_file=PAI_KUBE_CONFIG_PATH) api_instance = kubernetes.client.CoreV1Api() return api_instance
def get_stuck_pods(): # set up logging configuration config_file = os.path.join(sys.path[0], 'logging.conf') logging.config.fileConfig(config_file) logger = logging.getLogger('output') # filters for get pods label_selector = "jcx.inst.component=webapp" limit = 100 # initializing stuck pod variables pod_list_stuck = [] pod_list_starting = [] pod_list_old = [] starting_cutoff = timedelta(minutes=10) old_cutoff = timedelta(hours=8) # collecting list of webapp pods from jcx-prod-us-east config.load_kube_config(context="jcx-prod-us-east") api = client.CoreV1Api() pod_list_us = api.list_pod_for_all_namespaces(label_selector=label_selector, limit=limit) # collecting list of webapp pods from jcx-prod-eu config.load_kube_config(context="jcx-prod-eu") api = client.CoreV1Api() pod_list_eu = api.list_pod_for_all_namespaces(label_selector=label_selector, limit=limit) # combine eu and us pod lists pod_list = pod_list_us.items + pod_list_eu.items # filter pods into category lists based on age of pod if len(pod_list) != 0: for pod in pod_list: try: if not pod.status.container_statuses[0].ready: age = datetime.now(pytz.utc) - pod.metadata.creation_timestamp if age < starting_cutoff: pod_list_starting.append(pod) elif age > old_cutoff: pod_list_old.append(pod) else: pod_list_stuck.append(pod) except Exception as e: logger.debug("failed on pod: ", pod.metadata.namespace, " ", pod.metadata.name, e) # display results os.system('cls||clear') logger.debug("{:^33} {:^16} {:^19} {:^6} {:^20}".format("namespace", "Pod", "Age (H:M:S)", "Loc", "installation name")) logger.debug("{:^94}".format("--------------- STUCK PODS (>10 mins) ---------------")) print_pod_list(pod_list_stuck) logger.debug("{:^94}".format("------------ IN PROGRESS PODS (<10 mins) ------------")) print_pod_list(pod_list_starting) logger.debug("{:^94}".format("----------- VERY OLD DEAD PODS (>8 hours) -----------")) print_pod_list(pod_list_old) else: logger.debug("No stuck pods. Continue to monitor")
def generate_layout(output_file): # init client config.load_kube_config() v1 = client.CoreV1Api() # api server url api_servers_url = v1.api_client.configuration.host # generate dashboard-url services = v1.list_service_for_all_namespaces(field_selector="metadata.name=kubernetes-dashboard", pretty=False, timeout_seconds=56, watch=False) dashboard_service = services.items[0] dashboard_url = "http://{0}:80".format(dashboard_service.spec.cluster_ip) # query k8s nodes nodes = v1.list_node(pretty=False, timeout_seconds=56, watch=False) addressesList = map(lambda node: node.status.addresses, nodes.items) machineList = [] for addresses in addressesList: machine = dict() machine['machine-type'] = 'GENERIC' for address in addresses: if address.type == 'InternalIP': machine['hostip'] = address.address if address.type == 'Hostname': machine['hostname'] = address.address # TODO nodename == hostname on aks machine['nodename'] = address.address machineList.append(machine) machineList.sort(key=lambda k: k['hostname']) # assgin pai-master master = machineList[0] master['pai-master'] = 'true' master['zkid'] = 1 # assign pai-workers workers = machineList[1:] if len(machineList) > 1 else machineList for worker in workers: worker['pai-worker'] = 'true' # the default sku machineSku = yaml.load(""" GENERIC: mem: 1 gpu: type: generic count: 1 cpu: vcore: 1 os: ubuntu16.04 """, yaml.SafeLoader) layout = { "kubernetes": { "api-servers-url": api_servers_url, "dashboard-url": dashboard_url }, "machine-sku": machineSku, "machine-list": machineList } # print(yaml.dump(layout, default_flow_style=False)) with open(output_file, 'w') as outfile: yaml.dump(layout, outfile, default_flow_style=False)