def create_node(self, node_config, tags, count): conf = copy.deepcopy(node_config) pod_spec = conf.get("pod", conf) service_spec = conf.get("service") ingress_spec = conf.get("ingress") node_uuid = str(uuid4()) tags[TAG_RAY_CLUSTER_NAME] = self.cluster_name tags["ray-node-uuid"] = node_uuid pod_spec["metadata"]["namespace"] = self.namespace if "labels" in pod_spec["metadata"]: pod_spec["metadata"]["labels"].update(tags) else: pod_spec["metadata"]["labels"] = tags # Allow Operator-configured service to access the head node. if tags[TAG_RAY_NODE_KIND] == NODE_KIND_HEAD: head_selector = head_service_selector(self.cluster_name) pod_spec["metadata"]["labels"].update(head_selector) logger.info(log_prefix + "calling create_namespaced_pod " "(count={}).".format(count)) new_nodes = [] for _ in range(count): pod = core_api().create_namespaced_pod(self.namespace, pod_spec) new_nodes.append(pod) new_svcs = [] if service_spec is not None: logger.info(log_prefix + "calling create_namespaced_service " "(count={}).".format(count)) for new_node in new_nodes: metadata = service_spec.get("metadata", {}) metadata["name"] = new_node.metadata.name service_spec["metadata"] = metadata service_spec["spec"]["selector"] = {"ray-node-uuid": node_uuid} svc = core_api().create_namespaced_service( self.namespace, service_spec) new_svcs.append(svc) if ingress_spec is not None: logger.info(log_prefix + "calling create_namespaced_ingress " "(count={}).".format(count)) for new_svc in new_svcs: metadata = ingress_spec.get("metadata", {}) metadata["name"] = new_svc.metadata.name ingress_spec["metadata"] = metadata ingress_spec = _add_service_name_to_service_port( ingress_spec, new_svc.metadata.name) extensions_beta_api().create_namespaced_ingress( self.namespace, ingress_spec)
def terminate_node(self, node_id): logger.info(log_prefix + "calling delete_namespaced_pod") try: core_api().delete_namespaced_pod(node_id, self.namespace) except ApiException as e: if e.status == 404: logger.warning(log_prefix + f"Tried to delete pod {node_id}," " but the pod was not found (404).") else: raise try: core_api().delete_namespaced_service(node_id, self.namespace) except ApiException: pass try: extensions_beta_api().delete_namespaced_ingress( node_id, self.namespace, ) except ApiException: pass