def container_ip(self): self.network_view_copy = self.network_view.copy() result = deployer.add_network_node( self.network_view.name, self.selected_node, self.pool_id, self.network_view_copy, bot=self, owner=self.solution_metadata.get("owner"), ) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self, breaking_node_id=self.selected_node.node_id) if not success: raise DeploymentFailed( f"Failed to add node {self.selected_node.node_id} to network {wid}", wid=wid) self.network_view_copy = self.network_view_copy.copy() free_ips = self.network_view_copy.get_node_free_ips(self.selected_node) self.ip_address = self.drop_down_choice( "Please choose IP Address for your solution", free_ips, default=free_ips[0], required=True)
def etcd_ip(self): result = deployer.add_network_node( self.network_view.name, self.selected_node, self.pool_id, self.network_view, bot=self, owner=self.solution_metadata.get("owner"), ) if result: self.md_show_update("Deploying Network on Nodes....") for wid in result["ids"]: success = deployer.wait_workload( wid, self, breaking_node_id=self.selected_node.node_id) if not success: raise DeploymentFailed( f"Failed to add node {self.selected_node.node_id} to network {wid}", wid=wid) self.network_view = self.network_view.copy() self.ip_addresses = [] self.etcd_clutser = "" for n in range(self.no_nodes.value): free_ips = self.network_view.get_node_free_ips(self.selected_node) ip = self.drop_down_choice( f"Please choose IP Address for ETCD Node {n+1}", free_ips, default=free_ips[0], required=True, ) self.network_view.used_ips.append(ip) self.ip_addresses.append(ip) self.etcd_clutser = self.etcd_clutser + f"etcd_{n+1}=http://{ip}:2380,"
def ip_selection(self): self.md_show_update("Deploying Network on Nodes....") for i in range(len(self.minio_nodes)): node = self.minio_nodes[i] pool_id = self.minio_pool_ids[i] result = deployer.add_network_node( self.network_view.name, node, pool_id, self.network_view, bot=self, owner=self.solution_metadata.get("owner"), ) if not result: continue for wid in result["ids"]: success = deployer.wait_workload(wid, bot=self, breaking_node_id=node.node_id) if not success: raise DeploymentFailed( f"Failed to add node {node.node_id} to network {wid}", wid=wid) self.network_view = self.network_view.copy() self.ip_addresses = [] free_ips = self.network_view.get_node_free_ips(self.minio_nodes[0]) self.ip_addresses.append( self.drop_down_choice( "Please choose IP Address for Primary container", free_ips, required=True, default=free_ips[0])) self.network_view.used_ips.append(self.ip_addresses[0]) if self.mode == "Master/Slave": free_ips = self.network_view.get_node_free_ips(self.minio_nodes[1]) self.ip_addresses.append( self.drop_down_choice( "Please choose IP Address for Secondary container", free_ips, required=True, default=free_ips[0])) self.network_view.used_ips.append(self.ip_addresses[1])
def container_ip(self): self.network_view_copy = self.network_view.copy() result = deployer.add_network_node( self.network_view.name, self.selected_node, self.pool_id, self.network_view_copy, bot=self, **self.solution_metadata, ) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self, breaking_node_id=self.selected_node.node_id) if not success: raise StopChatFlow( f"Failed to add node {self.selected_node.node_id} to network {wid}" ) self.network_view_copy = self.network_view_copy.copy() free_ips = self.network_view_copy.get_node_free_ips(self.selected_node) self.ip_address = self.drop_down_choice( "Please choose IP Address for your solution", free_ips)
def reservation(self): metadata = { "name": self.domain, "form_info": { "Solution name": self.domain, "chatflow": "exposed" } } self.solution_metadata.update(metadata) query = {"mru": 1, "cru": 1, "sru": 1} self.selected_node = deployer.schedule_container(self.pool_id, **query) self.network_name = self.solution["Network"] result = deployer.add_network_node( self.network_name, self.selected_node, self.pool_id, bot=self, owner=self.solution_metadata.get("owner")) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self, breaking_node_id=self.selected_node.node_id) if not success: raise DeploymentFailed( f"Failed to add node to network {wid}", wid=wid) self.network_view = deployer.get_network_view(self.network_name) self.tcprouter_ip = self.network_view.get_free_ip(self.selected_node) if not self.tcprouter_ip: raise StopChatFlow( f"No available ips one for network {self.network_view.name} node {self.selected_node.node_id}" ) if self.domain_type != "Custom Domain": self.dom_id = deployer.create_subdomain( pool_id=self.domain_pool.pool_id, gateway_id=self.domain_gateway.node_id, subdomain=self.domain, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.dom_id, self) if not success: raise DeploymentFailed( f"Failed to reserve sub-domain workload {self.dom_id}", solution_uuid=self.solution_id) self.proxy_id = deployer.create_proxy( pool_id=self.domain_pool.pool_id, gateway_id=self.domain_gateway.node_id, domain_name=self.domain, trc_secret=self.secret, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.proxy_id, self) if not success: raise DeploymentFailed( f"Failed to reserve reverse proxy workload {self.proxy_id}", solution_uuid=self.solution_id) self.tcprouter_id = deployer.expose_address( pool_id=self.pool_id, gateway_id=self.domain_gateway.node_id, network_name=self.network_name, local_ip=self.solution_ip, port=self.port, tls_port=self.tls_port, trc_secret=self.secret, bot=self, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.tcprouter_id, self) if not success: raise DeploymentFailed( f"Failed to reserve TCP Router container workload {self.tcprouter_id}", solution_uuid=self.solution_id, wid=self.tcprouter_id, )
def reservation(self): metadata = { "name": self.domain, "form_info": { "Solution name": self.domain, "chatflow": "exposed" } } self.solution_metadata.update(metadata) query = {"mru": 1, "cru": 1, "sru": 1} self.selected_node = deployer.schedule_container(self.pool_id, **query) self.network_name = self.solution["Network"] result = deployer.add_network_node( self.network_name, self.selected_node, self.pool_id, bot=self, owner=self.solution_metadata.get("owner")) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self, breaking_node_id=self.selected_node.node_id) if not success: raise DeploymentFailed( f"Failed to add node to network {wid}", wid=wid) self.network_view = deployer.get_network_view(self.network_name) self.tcprouter_ip = self.network_view.get_free_ip(self.selected_node) if not self.tcprouter_ip: raise StopChatFlow( f"No available ips one for network {self.network_view.name} node {self.selected_node.node_id}" ) if self.domain_type != "Custom Domain": self.dom_id = deployer.create_subdomain( pool_id=self.domain_pool.pool_id, gateway_id=self.domain_gateway.node_id, subdomain=self.domain, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.dom_id, self) if not success: raise DeploymentFailed( f"Failed to reserve sub-domain workload {self.dom_id}", solution_uuid=self.solution_id) if self.proxy_type == "TRC": self.proxy_id = deployer.create_proxy( pool_id=self.domain_pool.pool_id, gateway_id=self.domain_gateway.node_id, domain_name=self.domain, trc_secret=self.secret, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.proxy_id, self) if not success: raise DeploymentFailed( f"Failed to reserve reverse proxy workload {self.proxy_id}", solution_uuid=self.solution_id) trc_log_config = j.core.config.get("LOGGING_SINK", {}) if trc_log_config: trc_log_config[ "channel_name"] = f"{self.threebot_name}-{self.solution_name}-trc".lower( ) if self.proxy_type == "NGINX": self.tcprouter_id = deployer.expose_and_create_certificate( domain=self.domain, email=self.email, pool_id=self.pool_id, gateway_id=self.domain_gateway.node_id, network_name=self.network_name, solution_ip=self.solution_ip, solution_port=self.port, trc_secret=self.secret, bot=self, enforce_https=self.force_https, log_config=trc_log_config, **self.solution_metadata, solution_uuid=self.solution_id, ) else: self.tcprouter_id, _ = deployer.expose_address( pool_id=self.pool_id, gateway_id=self.domain_gateway.node_id, network_name=self.network_name, local_ip=self.solution_ip, port=self.port, tls_port=self.tls_port, trc_secret=self.secret, bot=self, log_config=trc_log_config, **self.solution_metadata, solution_uuid=self.solution_id, ) success = deployer.wait_workload(self.tcprouter_id, self) if not success: raise DeploymentFailed( f"Failed to reserve TCP Router container workload {self.tcprouter_id}", solution_uuid=self.solution_id, wid=self.tcprouter_id, )
def deploy_vmachine( self, solution_name, vm_size, pool_id, nodes_generator, ssh_keys, solution_uuid, network_view, enable_public_ip, vmachine_type, description="", ): vmachine_ip = None while not vmachine_ip: try: try: vmachine_node = next(nodes_generator) except StopIteration: return self.vdc_deployer.info(f"Deploying virtual machine on node {vmachine_node.node_id}") # add node to network try: result = deployer.add_network_node( self.vdc_name, vmachine_node, pool_id, network_view, self.bot, self.identity.instance_name ) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self.bot, 3, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: self.vdc_deployer.error(f"Failed to deploy network for virtual machine") raise DeploymentFailed except DeploymentFailed: self.vdc_deployer.error( f"Failed to deploy network for virtual machine on node {vmachine_node.node_id}" ) continue except IndexError: self.vdc_deployer.error("All attempts to deploy virtual machine on nodes node have been failed") raise j.exceptions.Runtime("All attempts to deploy virtual machine on nodes node have been failed") network_view = network_view.copy() private_ip_address = network_view.get_free_ip(vmachine_node) self.vdc_deployer.info(f"Virtual machine ip: {private_ip_address}") metadata = {"form_info": {"chatflow": "vmachine", "name": solution_name, "solution_uuid": solution_uuid}} wid, public_ip = deployer.deploy_vmachine( node_id=vmachine_node.node_id, network_name=network_view.name, name=vmachine_type, ip_address=private_ip_address, ssh_keys=ssh_keys, pool_id=pool_id, size=vm_size, enable_public_ip=enable_public_ip, description=description, **metadata, ) self.vdc_deployer.info(f"virtual machine machine wid: {wid}") try: success = deployer.wait_workload( wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: raise DeploymentFailed() return {"public_ip": public_ip, "ip_address": private_ip_address, "vm_wid": wid} except DeploymentFailed: if enable_public_ip: self.zos.workloads.decomission(self.zos.workloads.get(wid).public_ip) self.vdc_deployer.error(f"Failed to deploy virtual machine wid: {wid}") continue self.vdc_deployer.error(f"All attempts to deploy virtual machine have failed")
def deploy_s3_minio_container(self, pool_id, ak, sk, ssh_key, scheduler, zdb_wids, solution_uuid, password): zdb_configs = [] self.vdc_deployer.info(f"deploying minio for zdbs: {zdb_wids}") for zid in zdb_wids: zdb_configs.append( deployer.get_zdb_url( zid, password, identity_name=self.identity.instance_name)) self.vdc_deployer.info(f"zdb_configs: {zdb_configs}") network_view = deployer.get_network_view( self.vdc_name, identity_name=self.identity.instance_name) for node in scheduler.nodes_by_capacity(cru=MINIO_CPU, mru=MINIO_MEMORY / 1024, sru=MINIO_DISK / 1024, ip_version="IPv6"): self.vdc_deployer.info(f"node {node.node_id} selected for minio") try: result = deployer.add_network_node(self.vdc_name, node, pool_id, network_view, self.bot, self.identity.instance_name) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self.bot, 5, identity_name=self.identity.instance_name, cancel_by_uuid=False) if not success: self.vdc_deployer.error( f"workload {wid} failed when adding node to network" ) raise DeploymentFailed() except DeploymentFailed: self.vdc_deployer.error( f"failed to deploy minio network on node {node.node_id}.") continue network_view = network_view.copy() ip_address = network_view.get_free_ip(node) self.vdc_deployer.info(f"minio ip address {ip_address}") try: result = deployer.deploy_minio_containers( pool_id, self.vdc_name, [node.node_id], [ip_address], zdb_configs, ak, sk, ssh_key, MINIO_CPU, MINIO_MEMORY, S3_NO_DATA_NODES, S3_NO_PARITY_NODES, public_ipv6=True, disk_size=int(MINIO_DISK / 1024), bot=self.bot, identity_name=self.identity.instance_name, # form_info={"chatflow": "minio"}, # name=self.vdc_name, solution_uuid=solution_uuid, description=self.vdc_deployer.description, ) except DeploymentFailed as e: if e.wid: workload = self.zos.workloads.get(e.wid) self.vdc_deployer.error( f"failed to deploy minio volume wid: {e.wid} on node {workload.info.node_id}" ) else: self.vdc_deployer.error( f"failed to deploy minio volume due to error {str(e)}") continue wid = result[0] try: success = deployer.wait_workload( wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False) if not success: raise DeploymentFailed() self.vdc_deployer.info( f"minio container deployed successfully wid: {wid}") return wid except DeploymentFailed: self.vdc_deployer.error( f"failed to deploy minio container wid: {wid}") continue self.vdc_deployer.error("no nodes available to deploy minio container")
def deploy_master( self, pool_id, scheduler, k8s_flavor, cluster_secret, ssh_keys, solution_uuid, network_view, datastore_endpoint="", network_subnet="", private_ip="", public_ip=None, ): master_ip = None # deploy_master k8s_resources_dict = VDC_SIZE.K8S_SIZES[k8s_flavor] nodes_generator = scheduler.nodes_by_capacity(**k8s_resources_dict, pool_id=pool_id, public_ip=True) while not master_ip: try: try: master_node = next(nodes_generator) except StopIteration: return self.vdc_deployer.info( f"Deploying kubernetes master on node {master_node.node_id} with datastore: {datastore_endpoint}" ) # add node to network try: result = deployer.add_network_node( self.vdc_name, master_node, pool_id, network_view, self.bot, self.identity.instance_name, subnet=network_subnet, ) if result: for wid in result["ids"]: success = deployer.wait_workload( wid, self.bot, 3, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: self.vdc_deployer.error(f"Failed to deploy network for kubernetes master wid: {wid}") raise DeploymentFailed except DeploymentFailed: self.vdc_deployer.error( f"Failed to deploy network for kubernetes master on node {master_node.node_id}" ) continue except IndexError: self.vdc_deployer.error("All attempts to deploy kubernetes master node have failed") raise j.exceptions.Runtime("All attempts to deploy kubernetes master node have failed") # reserve public_ip if public_ip: public_ip_wid = self.vdc_deployer.public_ip.get_specific_public_ip( pool_id, master_node.node_id, public_ip, solution_uuid=solution_uuid ) else: public_ip_wid = self.vdc_deployer.public_ip.get_public_ip( pool_id, master_node.node_id, solution_uuid=solution_uuid ) if not public_ip_wid: self.vdc_deployer.error(f"Failed to reserve public ip on node {master_node.node_id}") continue # deploy master if private_ip: private_ip_address = private_ip else: network_view = network_view.copy() private_ip_address = network_view.get_free_ip(master_node) self.vdc_deployer.info(f"Kubernetes master ip: {private_ip_address}") wid = deployer.deploy_kubernetes_master( pool_id, master_node.node_id, network_view.name, cluster_secret, ssh_keys, private_ip_address, size=k8s_flavor.value, identity_name=self.identity.instance_name, # form_info={"chatflow": "kubernetes"}, # name=self.vdc_name, secret=cluster_secret, solution_uuid=solution_uuid, description=self.vdc_deployer.description, public_ip_wid=public_ip_wid, datastore_endpoint=datastore_endpoint, disable_default_ingress=False, ) self.vdc_deployer.info(f"Kubernetes master wid: {wid}") try: success = deployer.wait_workload( wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False ) if not success: raise DeploymentFailed() master_ip = private_ip_address return master_ip except DeploymentFailed: self.zos.workloads.decomission(public_ip_wid) self.vdc_deployer.error(f"Failed to deploy kubernetes master wid: {wid}") continue self.vdc_deployer.error(f"All attempts to deploy kubernetes master have failed")
def deploy_threebot(self, minio_wid, pool_id, kube_config, embed_trc=True, backup_config=None, zdb_farms=None, cert=None): backup_config = backup_config or {} etcd_backup_config = j.core.config.get("VDC_S3_CONFIG", {}) flist = THREEBOT_VDC_FLIST if embed_trc else THREEBOT_FLIST # workload = self.zos.workloads.get(minio_wid) # if workload.info.workload_type != WorkloadType.Container: # raise j.exceptions.Validation(f"workload {minio_wid} is not container workload") # minio_ip_address = workload.network_connection[0].ipaddress vdc_dict = self.vdc_instance.to_dict() vdc_dict.pop("s3", None) vdc_dict.pop("kubernetes", None) vdc_dict.pop("threebot", None) secret_env = { "BACKUP_CONFIG": j.data.serializers.json.dumps(backup_config), "VDC_OWNER_TNAME": self.vdc_deployer.tname, "VDC_EMAIL": self.vdc_deployer.email, "VDC_PASSWORD_HASH": self.vdc_deployer.vdc_instance.get_password(), "KUBE_CONFIG": kube_config, "PROVISIONING_WALLET_SECRET": self.vdc_deployer.vdc_instance.provision_wallet.secret, "PREPAID_WALLET_SECRET": self.vdc_deployer.vdc_instance.prepaid_wallet.secret, "VDC_INSTANCE": j.data.serializers.json.dumps(vdc_dict), "THREEBOT_PRIVATE_KEY": self.vdc_deployer.ssh_key.private_key.strip(), "S3_URL": etcd_backup_config.get("S3_URL", ""), "S3_BUCKET": etcd_backup_config.get("S3_BUCKET", ""), "S3_AK": etcd_backup_config.get("S3_AK", ""), "S3_SK": etcd_backup_config.get("S3_SK", ""), } if cert: secret_env["CERT"] = cert.cert secret_env["CERT_PRIVATE_KEY"] = cert.private_key secret_env["CERT_FULLCHAIN"] = cert.fullchain env = { "VDC_NAME": self.vdc_name, "MONITORING_SERVER_URL": j.config.get("MONITORING_SERVER_URL", ""), "VDC_UUID": self.vdc_uuid, "EXPLORER_URL": j.core.identity.me.explorer_url, "VDC_S3_MAX_STORAGE": str( int(VDC_SIZE.S3_ZDB_SIZES[VDC_SIZE.VDC_FLAVORS[ self.vdc_deployer.flavor]["s3"]["size"]]["sru"] * (1 + (S3_NO_PARITY_NODES / (S3_NO_DATA_NODES + S3_NO_PARITY_NODES))))), "S3_AUTO_TOPUP_FARMS": ",".join(S3_AUTO_TOPUP_FARMS.get()) if not zdb_farms else ",".join(zdb_farms), "NETWORK_FARMS": ",".join(NETWORK_FARMS.get()), "COMPUTE_FARMS": ",".join(COMPUTE_FARMS.get()), # "VDC_MINIO_ADDRESS": minio_ip_address, "SDK_VERSION": self.branch, "SSHKEY": self.vdc_deployer.ssh_key.public_key.strip(), "MINIMAL": "true", "TEST_CERT": "true" if j.core.config.get("TEST_CERT") else "false", "ACME_SERVER_URL": self.acme_server_url, } if embed_trc: _, secret, remote = self._prepare_proxy() if not remote: return remote_ip, remote_port = remote.split(":") env.update({ "REMOTE_IP": remote_ip, "REMOTE_PORT": remote_port, }) secret_env["TRC_SECRET"] = secret if not self.vdc_instance.kubernetes: self.vdc_instance.load_info() scheduler = Scheduler(pool_id=pool_id) for node in scheduler.nodes_by_capacity(THREEBOT_CPU, THREEBOT_DISK / 1024, THREEBOT_MEMORY / 1024): network_view = deployer.get_network_view( self.vdc_name, identity_name=self.identity.instance_name) self.vdc_deployer.info( f"VDC threebot: node {node.node_id} selected") result = deployer.add_network_node(network_view.name, node, pool_id, network_view, self.bot, self.identity.instance_name) self.vdc_deployer.info( f"VDC threebot network update result for node {node.node_id} is {result}" ) if result: network_updated = True try: for wid in result["ids"]: success = deployer.wait_workload( wid, self.bot, expiry=5, breaking_node_id=node.node_id, identity_name=self.identity.instance_name, cancel_by_uuid=False, ) network_updated = network_updated and success if not network_updated: raise DeploymentFailed() except DeploymentFailed: self.vdc_deployer.error( f"Failed to deploy network on node {node.node_id}") continue network_view = network_view.copy() ip_address = network_view.get_free_ip(node) self.vdc_deployer.info( f"VDC threebot container ip address {ip_address}") if not ip_address: continue explorer = None if "test" in j.core.identity.me.explorer_url: explorer = "test" elif "dev" in j.core.identity.me.explorer_url: explorer = "dev" else: explorer = "main" log_config = j.core.config.get("VDC_LOG_CONFIG", {}) if log_config: log_config[ "channel_name"] = f"{self.vdc_instance.instance_name}_{explorer}" wid = deployer.deploy_container( pool_id=pool_id, node_id=node.node_id, network_name=network_view.name, ip_address=ip_address, flist=flist, env=env, cpu=THREEBOT_CPU, memory=THREEBOT_MEMORY, disk_size=THREEBOT_DISK, secret_env=secret_env, identity_name=self.identity.instance_name, description=self.vdc_deployer.description, form_info={ "chatflow": "threebot", "Solution name": self.vdc_name }, solution_uuid=self.vdc_uuid, log_config=log_config, ) self.vdc_deployer.info(f"VDC threebot container wid: {wid}") try: success = deployer.wait_workload( wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False) if success: return wid raise DeploymentFailed() except DeploymentFailed: self.vdc_deployer.error( f"failed to deploy threebot container on node: {node.node_id} wid: {wid}" ) continue
def add_nodes(self): zos = j.sals.zos.get() workload = zos.workloads.get(self.master_wid) metadata = j.sals.reservation_chatflow.reservation_chatflow.decrypt_reservation_metadata( workload.info.metadata) metadata = j.data.serializers.json.loads(metadata) pool_id = workload.info.pool_id old_wids = j.sals.marketplace.solutions.get_workloads_by_uuid( metadata.get("solution_uuid")) old_nodes = [ wid.info.node_id for wid in old_wids if wid.info.result.state == State.Ok ] if self.enable_public_ip: self.node_query["ipv4u"] = self.nodes_count nodes, pools = deployer.ask_multi_pool_distribution( self, self.nodes_count + len(old_nodes), self.node_query) nodes_pools_zip = list(zip(nodes, pools)) selected_nodes = list( filter(lambda x: x[0].node_id not in old_nodes, nodes_pools_zip)) if len(selected_nodes) < self.nodes_count: self.stop( f"Failed to find resources to deploy {self.nodes_count}, available nodes are: {len(selected_nodes)}" ) new_nodes = selected_nodes[:self.nodes_count] network_view = deployer.get_network_view(workload.network_id) master_ip = workload.ipaddress self.reservations = [] for node, pool_id in new_nodes: res = deployer.add_network_node(workload.network_id, node, pool_id) if res: for wid in res["ids"]: success = deployer.wait_workload( wid, breaking_node_id=node.node_id) if not success: raise StopChatFlow( f"Failed to add node {node.node_id} to network {wid}" ) network_view = network_view.copy() ip_address = network_view.get_free_ip(node) if not ip_address: raise StopChatFlow( f"No free IPs for network {network_name} on the specifed node" f" {node_id}") self.md_show_update(f"Deploying worker on node {node.node_id}") # Add public ip public_id_wid = 0 if self.enable_public_ip: public_id_wid, _ = deployer.create_public_ip( pool_id, node.node_id, solution_uuid=metadata.get("solution_uuid")) self.reservations.append( deployer.deploy_kubernetes_worker( pool_id, node.node_id, workload.network_id, workload.cluster_secret, workload.ssh_keys, ip_address, master_ip, size=self.cluster_size, identity_name=None, description="", public_ip_wid=public_id_wid, **metadata, )) self.success_workload_count = 0 zos = j.sals.zos.get() for resv in self.reservations: try: success = deployer.wait_workload(resv, self, cancel_by_uuid=False) self.success_workload_count += 1 except DeploymentFailed as ex: # Cleaning k8s workloads and public IP workloads in case of failure in deployment workload = zos.workloads.get(resv) if workload.public_ip: zos.workloads.decomission(workload.public_ip) zos.workloads.decomission(wid) j.logger.error( f"Failed to deploy workloads for {resv}, the error: {str(ex)}" ) if not self.success_workload_count: raise StopChatFlow( msg="Can't extend your cluster, please try again later") if self.success_workload_count < len(self.reservations): raise StopChatFlow( msg= f"Some nodes failed to extend, {self.success_workload_count} of {self.nodes_count}, please try again later" )