Пример #1
0
 def container_ip(self):
     self.network_view_copy = self.network_view.copy()
     result = deployer.add_network_node(
         self.network_view.name,
         self.selected_node,
         self.pool_id,
         self.network_view_copy,
         bot=self,
         owner=self.solution_metadata.get("owner"),
     )
     if result:
         for wid in result["ids"]:
             success = deployer.wait_workload(
                 wid, self, breaking_node_id=self.selected_node.node_id)
             if not success:
                 raise DeploymentFailed(
                     f"Failed to add node {self.selected_node.node_id} to network {wid}",
                     wid=wid)
         self.network_view_copy = self.network_view_copy.copy()
     free_ips = self.network_view_copy.get_node_free_ips(self.selected_node)
     self.ip_address = self.drop_down_choice(
         "Please choose IP Address for your solution",
         free_ips,
         default=free_ips[0],
         required=True)
Пример #2
0
    def etcd_ip(self):
        result = deployer.add_network_node(
            self.network_view.name,
            self.selected_node,
            self.pool_id,
            self.network_view,
            bot=self,
            owner=self.solution_metadata.get("owner"),
        )
        if result:
            self.md_show_update("Deploying Network on Nodes....")
            for wid in result["ids"]:
                success = deployer.wait_workload(
                    wid, self, breaking_node_id=self.selected_node.node_id)
                if not success:
                    raise DeploymentFailed(
                        f"Failed to add node {self.selected_node.node_id} to network {wid}",
                        wid=wid)
            self.network_view = self.network_view.copy()

        self.ip_addresses = []
        self.etcd_clutser = ""
        for n in range(self.no_nodes.value):
            free_ips = self.network_view.get_node_free_ips(self.selected_node)
            ip = self.drop_down_choice(
                f"Please choose IP Address for ETCD Node {n+1}",
                free_ips,
                default=free_ips[0],
                required=True,
            )
            self.network_view.used_ips.append(ip)
            self.ip_addresses.append(ip)
            self.etcd_clutser = self.etcd_clutser + f"etcd_{n+1}=http://{ip}:2380,"
Пример #3
0
    def ip_selection(self):
        self.md_show_update("Deploying Network on Nodes....")
        for i in range(len(self.minio_nodes)):
            node = self.minio_nodes[i]
            pool_id = self.minio_pool_ids[i]
            result = deployer.add_network_node(
                self.network_view.name,
                node,
                pool_id,
                self.network_view,
                bot=self,
                owner=self.solution_metadata.get("owner"),
            )
            if not result:
                continue
            for wid in result["ids"]:
                success = deployer.wait_workload(wid,
                                                 bot=self,
                                                 breaking_node_id=node.node_id)
                if not success:
                    raise DeploymentFailed(
                        f"Failed to add node {node.node_id} to network {wid}",
                        wid=wid)
            self.network_view = self.network_view.copy()

        self.ip_addresses = []
        free_ips = self.network_view.get_node_free_ips(self.minio_nodes[0])
        self.ip_addresses.append(
            self.drop_down_choice(
                "Please choose IP Address for Primary container",
                free_ips,
                required=True,
                default=free_ips[0]))
        self.network_view.used_ips.append(self.ip_addresses[0])
        if self.mode == "Master/Slave":
            free_ips = self.network_view.get_node_free_ips(self.minio_nodes[1])
            self.ip_addresses.append(
                self.drop_down_choice(
                    "Please choose IP Address for Secondary container",
                    free_ips,
                    required=True,
                    default=free_ips[0]))
            self.network_view.used_ips.append(self.ip_addresses[1])
Пример #4
0
 def container_ip(self):
     self.network_view_copy = self.network_view.copy()
     result = deployer.add_network_node(
         self.network_view.name,
         self.selected_node,
         self.pool_id,
         self.network_view_copy,
         bot=self,
         **self.solution_metadata,
     )
     if result:
         for wid in result["ids"]:
             success = deployer.wait_workload(
                 wid, self, breaking_node_id=self.selected_node.node_id)
             if not success:
                 raise StopChatFlow(
                     f"Failed to add node {self.selected_node.node_id} to network {wid}"
                 )
         self.network_view_copy = self.network_view_copy.copy()
     free_ips = self.network_view_copy.get_node_free_ips(self.selected_node)
     self.ip_address = self.drop_down_choice(
         "Please choose IP Address for your solution", free_ips)
Пример #5
0
    def reservation(self):
        metadata = {
            "name": self.domain,
            "form_info": {
                "Solution name": self.domain,
                "chatflow": "exposed"
            }
        }
        self.solution_metadata.update(metadata)
        query = {"mru": 1, "cru": 1, "sru": 1}
        self.selected_node = deployer.schedule_container(self.pool_id, **query)
        self.network_name = self.solution["Network"]

        result = deployer.add_network_node(
            self.network_name,
            self.selected_node,
            self.pool_id,
            bot=self,
            owner=self.solution_metadata.get("owner"))
        if result:
            for wid in result["ids"]:
                success = deployer.wait_workload(
                    wid, self, breaking_node_id=self.selected_node.node_id)
                if not success:
                    raise DeploymentFailed(
                        f"Failed to add node to network {wid}", wid=wid)

        self.network_view = deployer.get_network_view(self.network_name)
        self.tcprouter_ip = self.network_view.get_free_ip(self.selected_node)
        if not self.tcprouter_ip:
            raise StopChatFlow(
                f"No available ips one for network {self.network_view.name} node {self.selected_node.node_id}"
            )

        if self.domain_type != "Custom Domain":
            self.dom_id = deployer.create_subdomain(
                pool_id=self.domain_pool.pool_id,
                gateway_id=self.domain_gateway.node_id,
                subdomain=self.domain,
                **self.solution_metadata,
                solution_uuid=self.solution_id,
            )
            success = deployer.wait_workload(self.dom_id, self)
            if not success:
                raise DeploymentFailed(
                    f"Failed to reserve sub-domain workload {self.dom_id}",
                    solution_uuid=self.solution_id)

        self.proxy_id = deployer.create_proxy(
            pool_id=self.domain_pool.pool_id,
            gateway_id=self.domain_gateway.node_id,
            domain_name=self.domain,
            trc_secret=self.secret,
            **self.solution_metadata,
            solution_uuid=self.solution_id,
        )
        success = deployer.wait_workload(self.proxy_id, self)
        if not success:
            raise DeploymentFailed(
                f"Failed to reserve reverse proxy workload {self.proxy_id}",
                solution_uuid=self.solution_id)

        self.tcprouter_id = deployer.expose_address(
            pool_id=self.pool_id,
            gateway_id=self.domain_gateway.node_id,
            network_name=self.network_name,
            local_ip=self.solution_ip,
            port=self.port,
            tls_port=self.tls_port,
            trc_secret=self.secret,
            bot=self,
            **self.solution_metadata,
            solution_uuid=self.solution_id,
        )
        success = deployer.wait_workload(self.tcprouter_id, self)
        if not success:
            raise DeploymentFailed(
                f"Failed to reserve TCP Router container workload {self.tcprouter_id}",
                solution_uuid=self.solution_id,
                wid=self.tcprouter_id,
            )
Пример #6
0
    def reservation(self):
        metadata = {
            "name": self.domain,
            "form_info": {
                "Solution name": self.domain,
                "chatflow": "exposed"
            }
        }
        self.solution_metadata.update(metadata)
        query = {"mru": 1, "cru": 1, "sru": 1}
        self.selected_node = deployer.schedule_container(self.pool_id, **query)
        self.network_name = self.solution["Network"]

        result = deployer.add_network_node(
            self.network_name,
            self.selected_node,
            self.pool_id,
            bot=self,
            owner=self.solution_metadata.get("owner"))
        if result:
            for wid in result["ids"]:
                success = deployer.wait_workload(
                    wid, self, breaking_node_id=self.selected_node.node_id)
                if not success:
                    raise DeploymentFailed(
                        f"Failed to add node to network {wid}", wid=wid)

        self.network_view = deployer.get_network_view(self.network_name)
        self.tcprouter_ip = self.network_view.get_free_ip(self.selected_node)
        if not self.tcprouter_ip:
            raise StopChatFlow(
                f"No available ips one for network {self.network_view.name} node {self.selected_node.node_id}"
            )

        if self.domain_type != "Custom Domain":
            self.dom_id = deployer.create_subdomain(
                pool_id=self.domain_pool.pool_id,
                gateway_id=self.domain_gateway.node_id,
                subdomain=self.domain,
                **self.solution_metadata,
                solution_uuid=self.solution_id,
            )
            success = deployer.wait_workload(self.dom_id, self)
            if not success:
                raise DeploymentFailed(
                    f"Failed to reserve sub-domain workload {self.dom_id}",
                    solution_uuid=self.solution_id)

        if self.proxy_type == "TRC":
            self.proxy_id = deployer.create_proxy(
                pool_id=self.domain_pool.pool_id,
                gateway_id=self.domain_gateway.node_id,
                domain_name=self.domain,
                trc_secret=self.secret,
                **self.solution_metadata,
                solution_uuid=self.solution_id,
            )
            success = deployer.wait_workload(self.proxy_id, self)
            if not success:
                raise DeploymentFailed(
                    f"Failed to reserve reverse proxy workload {self.proxy_id}",
                    solution_uuid=self.solution_id)

        trc_log_config = j.core.config.get("LOGGING_SINK", {})
        if trc_log_config:
            trc_log_config[
                "channel_name"] = f"{self.threebot_name}-{self.solution_name}-trc".lower(
                )

        if self.proxy_type == "NGINX":
            self.tcprouter_id = deployer.expose_and_create_certificate(
                domain=self.domain,
                email=self.email,
                pool_id=self.pool_id,
                gateway_id=self.domain_gateway.node_id,
                network_name=self.network_name,
                solution_ip=self.solution_ip,
                solution_port=self.port,
                trc_secret=self.secret,
                bot=self,
                enforce_https=self.force_https,
                log_config=trc_log_config,
                **self.solution_metadata,
                solution_uuid=self.solution_id,
            )
        else:
            self.tcprouter_id, _ = deployer.expose_address(
                pool_id=self.pool_id,
                gateway_id=self.domain_gateway.node_id,
                network_name=self.network_name,
                local_ip=self.solution_ip,
                port=self.port,
                tls_port=self.tls_port,
                trc_secret=self.secret,
                bot=self,
                log_config=trc_log_config,
                **self.solution_metadata,
                solution_uuid=self.solution_id,
            )
        success = deployer.wait_workload(self.tcprouter_id, self)
        if not success:
            raise DeploymentFailed(
                f"Failed to reserve TCP Router container workload {self.tcprouter_id}",
                solution_uuid=self.solution_id,
                wid=self.tcprouter_id,
            )
Пример #7
0
    def deploy_vmachine(
        self,
        solution_name,
        vm_size,
        pool_id,
        nodes_generator,
        ssh_keys,
        solution_uuid,
        network_view,
        enable_public_ip,
        vmachine_type,
        description="",
    ):
        vmachine_ip = None
        while not vmachine_ip:
            try:
                try:
                    vmachine_node = next(nodes_generator)
                except StopIteration:
                    return
                self.vdc_deployer.info(f"Deploying virtual machine on node {vmachine_node.node_id}")
                # add node to network
                try:
                    result = deployer.add_network_node(
                        self.vdc_name, vmachine_node, pool_id, network_view, self.bot, self.identity.instance_name
                    )
                    if result:
                        for wid in result["ids"]:
                            success = deployer.wait_workload(
                                wid, self.bot, 3, identity_name=self.identity.instance_name, cancel_by_uuid=False
                            )
                            if not success:
                                self.vdc_deployer.error(f"Failed to deploy network for virtual machine")
                                raise DeploymentFailed
                except DeploymentFailed:
                    self.vdc_deployer.error(
                        f"Failed to deploy network for virtual machine on node {vmachine_node.node_id}"
                    )
                    continue
            except IndexError:
                self.vdc_deployer.error("All attempts to deploy virtual machine on nodes node have been failed")
                raise j.exceptions.Runtime("All attempts to deploy virtual machine on nodes node have been failed")

            network_view = network_view.copy()
            private_ip_address = network_view.get_free_ip(vmachine_node)

            self.vdc_deployer.info(f"Virtual machine ip: {private_ip_address}")

            metadata = {"form_info": {"chatflow": "vmachine", "name": solution_name, "solution_uuid": solution_uuid}}
            wid, public_ip = deployer.deploy_vmachine(
                node_id=vmachine_node.node_id,
                network_name=network_view.name,
                name=vmachine_type,
                ip_address=private_ip_address,
                ssh_keys=ssh_keys,
                pool_id=pool_id,
                size=vm_size,
                enable_public_ip=enable_public_ip,
                description=description,
                **metadata,
            )
            self.vdc_deployer.info(f"virtual machine machine wid: {wid}")
            try:
                success = deployer.wait_workload(
                    wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False
                )
                if not success:
                    raise DeploymentFailed()
                return {"public_ip": public_ip, "ip_address": private_ip_address, "vm_wid": wid}
            except DeploymentFailed:
                if enable_public_ip:
                    self.zos.workloads.decomission(self.zos.workloads.get(wid).public_ip)
                self.vdc_deployer.error(f"Failed to deploy virtual machine wid: {wid}")
                continue
        self.vdc_deployer.error(f"All attempts to deploy virtual machine have failed")
Пример #8
0
    def deploy_s3_minio_container(self, pool_id, ak, sk, ssh_key, scheduler,
                                  zdb_wids, solution_uuid, password):
        zdb_configs = []
        self.vdc_deployer.info(f"deploying minio for zdbs: {zdb_wids}")
        for zid in zdb_wids:
            zdb_configs.append(
                deployer.get_zdb_url(
                    zid, password, identity_name=self.identity.instance_name))
        self.vdc_deployer.info(f"zdb_configs: {zdb_configs}")

        network_view = deployer.get_network_view(
            self.vdc_name, identity_name=self.identity.instance_name)
        for node in scheduler.nodes_by_capacity(cru=MINIO_CPU,
                                                mru=MINIO_MEMORY / 1024,
                                                sru=MINIO_DISK / 1024,
                                                ip_version="IPv6"):
            self.vdc_deployer.info(f"node {node.node_id} selected for minio")
            try:
                result = deployer.add_network_node(self.vdc_name, node,
                                                   pool_id, network_view,
                                                   self.bot,
                                                   self.identity.instance_name)
                if result:
                    for wid in result["ids"]:
                        success = deployer.wait_workload(
                            wid,
                            self.bot,
                            5,
                            identity_name=self.identity.instance_name,
                            cancel_by_uuid=False)
                        if not success:
                            self.vdc_deployer.error(
                                f"workload {wid} failed when adding node to network"
                            )
                            raise DeploymentFailed()
            except DeploymentFailed:
                self.vdc_deployer.error(
                    f"failed to deploy minio network on node {node.node_id}.")
                continue

            network_view = network_view.copy()
            ip_address = network_view.get_free_ip(node)
            self.vdc_deployer.info(f"minio ip address {ip_address}")
            try:
                result = deployer.deploy_minio_containers(
                    pool_id,
                    self.vdc_name,
                    [node.node_id],
                    [ip_address],
                    zdb_configs,
                    ak,
                    sk,
                    ssh_key,
                    MINIO_CPU,
                    MINIO_MEMORY,
                    S3_NO_DATA_NODES,
                    S3_NO_PARITY_NODES,
                    public_ipv6=True,
                    disk_size=int(MINIO_DISK / 1024),
                    bot=self.bot,
                    identity_name=self.identity.instance_name,
                    # form_info={"chatflow": "minio"},
                    # name=self.vdc_name,
                    solution_uuid=solution_uuid,
                    description=self.vdc_deployer.description,
                )
            except DeploymentFailed as e:
                if e.wid:
                    workload = self.zos.workloads.get(e.wid)
                    self.vdc_deployer.error(
                        f"failed to deploy minio volume wid: {e.wid} on node {workload.info.node_id}"
                    )
                else:
                    self.vdc_deployer.error(
                        f"failed to deploy minio volume due to error {str(e)}")
                continue
            wid = result[0]
            try:
                success = deployer.wait_workload(
                    wid,
                    self.bot,
                    identity_name=self.identity.instance_name,
                    cancel_by_uuid=False)
                if not success:
                    raise DeploymentFailed()
                self.vdc_deployer.info(
                    f"minio container deployed successfully wid: {wid}")
                return wid
            except DeploymentFailed:
                self.vdc_deployer.error(
                    f"failed to deploy minio container wid: {wid}")
                continue
        self.vdc_deployer.error("no nodes available to deploy minio container")
Пример #9
0
    def deploy_master(
        self,
        pool_id,
        scheduler,
        k8s_flavor,
        cluster_secret,
        ssh_keys,
        solution_uuid,
        network_view,
        datastore_endpoint="",
        network_subnet="",
        private_ip="",
        public_ip=None,
    ):
        master_ip = None
        # deploy_master
        k8s_resources_dict = VDC_SIZE.K8S_SIZES[k8s_flavor]
        nodes_generator = scheduler.nodes_by_capacity(**k8s_resources_dict, pool_id=pool_id, public_ip=True)
        while not master_ip:
            try:
                try:
                    master_node = next(nodes_generator)
                except StopIteration:
                    return
                self.vdc_deployer.info(
                    f"Deploying kubernetes master on node {master_node.node_id} with datastore: {datastore_endpoint}"
                )
                # add node to network
                try:
                    result = deployer.add_network_node(
                        self.vdc_name,
                        master_node,
                        pool_id,
                        network_view,
                        self.bot,
                        self.identity.instance_name,
                        subnet=network_subnet,
                    )
                    if result:
                        for wid in result["ids"]:
                            success = deployer.wait_workload(
                                wid, self.bot, 3, identity_name=self.identity.instance_name, cancel_by_uuid=False
                            )
                            if not success:
                                self.vdc_deployer.error(f"Failed to deploy network for kubernetes master wid: {wid}")
                                raise DeploymentFailed
                except DeploymentFailed:
                    self.vdc_deployer.error(
                        f"Failed to deploy network for kubernetes master on node {master_node.node_id}"
                    )
                    continue
            except IndexError:
                self.vdc_deployer.error("All attempts to deploy kubernetes master node have failed")
                raise j.exceptions.Runtime("All attempts to deploy kubernetes master node have failed")

            # reserve public_ip
            if public_ip:
                public_ip_wid = self.vdc_deployer.public_ip.get_specific_public_ip(
                    pool_id, master_node.node_id, public_ip, solution_uuid=solution_uuid
                )
            else:
                public_ip_wid = self.vdc_deployer.public_ip.get_public_ip(
                    pool_id, master_node.node_id, solution_uuid=solution_uuid
                )

            if not public_ip_wid:
                self.vdc_deployer.error(f"Failed to reserve public ip on node {master_node.node_id}")
                continue

            # deploy master
            if private_ip:
                private_ip_address = private_ip
            else:
                network_view = network_view.copy()
                private_ip_address = network_view.get_free_ip(master_node)
            self.vdc_deployer.info(f"Kubernetes master ip: {private_ip_address}")
            wid = deployer.deploy_kubernetes_master(
                pool_id,
                master_node.node_id,
                network_view.name,
                cluster_secret,
                ssh_keys,
                private_ip_address,
                size=k8s_flavor.value,
                identity_name=self.identity.instance_name,
                # form_info={"chatflow": "kubernetes"},
                # name=self.vdc_name,
                secret=cluster_secret,
                solution_uuid=solution_uuid,
                description=self.vdc_deployer.description,
                public_ip_wid=public_ip_wid,
                datastore_endpoint=datastore_endpoint,
                disable_default_ingress=False,
            )
            self.vdc_deployer.info(f"Kubernetes master wid: {wid}")
            try:
                success = deployer.wait_workload(
                    wid, self.bot, identity_name=self.identity.instance_name, cancel_by_uuid=False
                )
                if not success:
                    raise DeploymentFailed()
                master_ip = private_ip_address
                return master_ip
            except DeploymentFailed:
                self.zos.workloads.decomission(public_ip_wid)
                self.vdc_deployer.error(f"Failed to deploy kubernetes master wid: {wid}")
                continue
        self.vdc_deployer.error(f"All attempts to deploy kubernetes master have failed")
Пример #10
0
    def deploy_threebot(self,
                        minio_wid,
                        pool_id,
                        kube_config,
                        embed_trc=True,
                        backup_config=None,
                        zdb_farms=None,
                        cert=None):
        backup_config = backup_config or {}
        etcd_backup_config = j.core.config.get("VDC_S3_CONFIG", {})
        flist = THREEBOT_VDC_FLIST if embed_trc else THREEBOT_FLIST
        # workload = self.zos.workloads.get(minio_wid)
        # if workload.info.workload_type != WorkloadType.Container:
        #     raise j.exceptions.Validation(f"workload {minio_wid} is not container workload")
        # minio_ip_address = workload.network_connection[0].ipaddress
        vdc_dict = self.vdc_instance.to_dict()
        vdc_dict.pop("s3", None)
        vdc_dict.pop("kubernetes", None)
        vdc_dict.pop("threebot", None)
        secret_env = {
            "BACKUP_CONFIG":
            j.data.serializers.json.dumps(backup_config),
            "VDC_OWNER_TNAME":
            self.vdc_deployer.tname,
            "VDC_EMAIL":
            self.vdc_deployer.email,
            "VDC_PASSWORD_HASH":
            self.vdc_deployer.vdc_instance.get_password(),
            "KUBE_CONFIG":
            kube_config,
            "PROVISIONING_WALLET_SECRET":
            self.vdc_deployer.vdc_instance.provision_wallet.secret,
            "PREPAID_WALLET_SECRET":
            self.vdc_deployer.vdc_instance.prepaid_wallet.secret,
            "VDC_INSTANCE":
            j.data.serializers.json.dumps(vdc_dict),
            "THREEBOT_PRIVATE_KEY":
            self.vdc_deployer.ssh_key.private_key.strip(),
            "S3_URL":
            etcd_backup_config.get("S3_URL", ""),
            "S3_BUCKET":
            etcd_backup_config.get("S3_BUCKET", ""),
            "S3_AK":
            etcd_backup_config.get("S3_AK", ""),
            "S3_SK":
            etcd_backup_config.get("S3_SK", ""),
        }

        if cert:
            secret_env["CERT"] = cert.cert
            secret_env["CERT_PRIVATE_KEY"] = cert.private_key
            secret_env["CERT_FULLCHAIN"] = cert.fullchain

        env = {
            "VDC_NAME":
            self.vdc_name,
            "MONITORING_SERVER_URL":
            j.config.get("MONITORING_SERVER_URL", ""),
            "VDC_UUID":
            self.vdc_uuid,
            "EXPLORER_URL":
            j.core.identity.me.explorer_url,
            "VDC_S3_MAX_STORAGE":
            str(
                int(VDC_SIZE.S3_ZDB_SIZES[VDC_SIZE.VDC_FLAVORS[
                    self.vdc_deployer.flavor]["s3"]["size"]]["sru"] *
                    (1 + (S3_NO_PARITY_NODES /
                          (S3_NO_DATA_NODES + S3_NO_PARITY_NODES))))),
            "S3_AUTO_TOPUP_FARMS":
            ",".join(S3_AUTO_TOPUP_FARMS.get())
            if not zdb_farms else ",".join(zdb_farms),
            "NETWORK_FARMS":
            ",".join(NETWORK_FARMS.get()),
            "COMPUTE_FARMS":
            ",".join(COMPUTE_FARMS.get()),
            # "VDC_MINIO_ADDRESS": minio_ip_address,
            "SDK_VERSION":
            self.branch,
            "SSHKEY":
            self.vdc_deployer.ssh_key.public_key.strip(),
            "MINIMAL":
            "true",
            "TEST_CERT":
            "true" if j.core.config.get("TEST_CERT") else "false",
            "ACME_SERVER_URL":
            self.acme_server_url,
        }
        if embed_trc:
            _, secret, remote = self._prepare_proxy()
            if not remote:
                return
            remote_ip, remote_port = remote.split(":")
            env.update({
                "REMOTE_IP": remote_ip,
                "REMOTE_PORT": remote_port,
            })
            secret_env["TRC_SECRET"] = secret
        if not self.vdc_instance.kubernetes:
            self.vdc_instance.load_info()

        scheduler = Scheduler(pool_id=pool_id)
        for node in scheduler.nodes_by_capacity(THREEBOT_CPU,
                                                THREEBOT_DISK / 1024,
                                                THREEBOT_MEMORY / 1024):
            network_view = deployer.get_network_view(
                self.vdc_name, identity_name=self.identity.instance_name)
            self.vdc_deployer.info(
                f"VDC threebot: node {node.node_id} selected")
            result = deployer.add_network_node(network_view.name, node,
                                               pool_id, network_view, self.bot,
                                               self.identity.instance_name)

            self.vdc_deployer.info(
                f"VDC threebot network update result for node {node.node_id} is {result}"
            )
            if result:
                network_updated = True
                try:
                    for wid in result["ids"]:
                        success = deployer.wait_workload(
                            wid,
                            self.bot,
                            expiry=5,
                            breaking_node_id=node.node_id,
                            identity_name=self.identity.instance_name,
                            cancel_by_uuid=False,
                        )
                        network_updated = network_updated and success
                    if not network_updated:
                        raise DeploymentFailed()
                except DeploymentFailed:
                    self.vdc_deployer.error(
                        f"Failed to deploy network on node {node.node_id}")
                    continue
            network_view = network_view.copy()
            ip_address = network_view.get_free_ip(node)
            self.vdc_deployer.info(
                f"VDC threebot container ip address {ip_address}")
            if not ip_address:
                continue
            explorer = None
            if "test" in j.core.identity.me.explorer_url:
                explorer = "test"
            elif "dev" in j.core.identity.me.explorer_url:
                explorer = "dev"
            else:
                explorer = "main"

            log_config = j.core.config.get("VDC_LOG_CONFIG", {})
            if log_config:
                log_config[
                    "channel_name"] = f"{self.vdc_instance.instance_name}_{explorer}"

            wid = deployer.deploy_container(
                pool_id=pool_id,
                node_id=node.node_id,
                network_name=network_view.name,
                ip_address=ip_address,
                flist=flist,
                env=env,
                cpu=THREEBOT_CPU,
                memory=THREEBOT_MEMORY,
                disk_size=THREEBOT_DISK,
                secret_env=secret_env,
                identity_name=self.identity.instance_name,
                description=self.vdc_deployer.description,
                form_info={
                    "chatflow": "threebot",
                    "Solution name": self.vdc_name
                },
                solution_uuid=self.vdc_uuid,
                log_config=log_config,
            )
            self.vdc_deployer.info(f"VDC threebot container wid: {wid}")
            try:
                success = deployer.wait_workload(
                    wid,
                    self.bot,
                    identity_name=self.identity.instance_name,
                    cancel_by_uuid=False)
                if success:
                    return wid
                raise DeploymentFailed()
            except DeploymentFailed:
                self.vdc_deployer.error(
                    f"failed to deploy threebot container on node: {node.node_id} wid: {wid}"
                )
                continue
Пример #11
0
    def add_nodes(self):
        zos = j.sals.zos.get()
        workload = zos.workloads.get(self.master_wid)
        metadata = j.sals.reservation_chatflow.reservation_chatflow.decrypt_reservation_metadata(
            workload.info.metadata)
        metadata = j.data.serializers.json.loads(metadata)
        pool_id = workload.info.pool_id
        old_wids = j.sals.marketplace.solutions.get_workloads_by_uuid(
            metadata.get("solution_uuid"))
        old_nodes = [
            wid.info.node_id for wid in old_wids
            if wid.info.result.state == State.Ok
        ]
        if self.enable_public_ip:
            self.node_query["ipv4u"] = self.nodes_count
        nodes, pools = deployer.ask_multi_pool_distribution(
            self, self.nodes_count + len(old_nodes), self.node_query)
        nodes_pools_zip = list(zip(nodes, pools))
        selected_nodes = list(
            filter(lambda x: x[0].node_id not in old_nodes, nodes_pools_zip))
        if len(selected_nodes) < self.nodes_count:
            self.stop(
                f"Failed to find resources to deploy {self.nodes_count}, available nodes are: {len(selected_nodes)}"
            )
        new_nodes = selected_nodes[:self.nodes_count]
        network_view = deployer.get_network_view(workload.network_id)
        master_ip = workload.ipaddress

        self.reservations = []
        for node, pool_id in new_nodes:
            res = deployer.add_network_node(workload.network_id, node, pool_id)
            if res:
                for wid in res["ids"]:
                    success = deployer.wait_workload(
                        wid, breaking_node_id=node.node_id)
                    if not success:
                        raise StopChatFlow(
                            f"Failed to add node {node.node_id} to network {wid}"
                        )
            network_view = network_view.copy()
            ip_address = network_view.get_free_ip(node)
            if not ip_address:
                raise StopChatFlow(
                    f"No free IPs for network {network_name} on the specifed node"
                    f" {node_id}")

            self.md_show_update(f"Deploying worker on node {node.node_id}")
            # Add public ip
            public_id_wid = 0
            if self.enable_public_ip:
                public_id_wid, _ = deployer.create_public_ip(
                    pool_id,
                    node.node_id,
                    solution_uuid=metadata.get("solution_uuid"))

            self.reservations.append(
                deployer.deploy_kubernetes_worker(
                    pool_id,
                    node.node_id,
                    workload.network_id,
                    workload.cluster_secret,
                    workload.ssh_keys,
                    ip_address,
                    master_ip,
                    size=self.cluster_size,
                    identity_name=None,
                    description="",
                    public_ip_wid=public_id_wid,
                    **metadata,
                ))

        self.success_workload_count = 0
        zos = j.sals.zos.get()
        for resv in self.reservations:
            try:
                success = deployer.wait_workload(resv,
                                                 self,
                                                 cancel_by_uuid=False)
                self.success_workload_count += 1
            except DeploymentFailed as ex:
                # Cleaning k8s workloads and public IP workloads in case of failure in deployment
                workload = zos.workloads.get(resv)
                if workload.public_ip:
                    zos.workloads.decomission(workload.public_ip)
                zos.workloads.decomission(wid)
                j.logger.error(
                    f"Failed to deploy  workloads for {resv}, the error: {str(ex)}"
                )

        if not self.success_workload_count:
            raise StopChatFlow(
                msg="Can't extend your cluster, please try again later")

        if self.success_workload_count < len(self.reservations):
            raise StopChatFlow(
                msg=
                f"Some nodes failed to extend, {self.success_workload_count} of {self.nodes_count}, please try again later"
            )