def wait_pool_reservation(reservation_id, exp=5): zos = get_zos() expiration = j.data.time.now().timestamp + exp * 60 while j.data.time.get().timestamp < expiration: payment_info = zos.pools.get_payment_info(reservation_id) if payment_info.paid and payment_info.released: return True gevent.sleep(2) return False
def wait_pool_payment(pool_id, exp=5, trigger_cus=0, trigger_sus=1): zos = get_zos() expiration = j.data.time.now().timestamp + exp * 60 while j.data.time.get().timestamp < expiration: pool = zos.pools.get(pool_id) if pool.cus >= trigger_cus and pool.sus >= trigger_sus: return True gevent.sleep(2) return False
def get_farm_pool_id(farm_name): """ returns a pool_id associated on the farm or create an empty pool and return its id """ zos = get_zos() for pool in zos.pools.list(): farm_id = deployer.get_pool_farm_id(pool.pool_id, pool) pool_farm_name = zos._explorer.farms.get(farm_id).name if farm_name == pool_farm_name: return pool.pool_id pool_info = zos.pools.create(0, 0, 0, farm_name) return pool_info.reservation_id
def __init__(self, farm_name=None, pool_id=None): self.zos = get_zos() self._pool_node_ids = None if not farm_name and not pool_id: raise j.exceptions.Validation("must pass farm_name or pool_id") if not farm_name and pool_id: pool = self.zos.pools.get(pool_id) self._pool_node_ids = pool.node_ids farm_id = deployer.get_pool_farm_id(pool_id, pool) farm_name = self.zos._explorer.farms.get(farm_id).name self.farm_name = farm_name self._nodes = [] self._excluded_node_ids = set()
def get_target_s3_zdb_config(target_name): zos = get_zos() for sol_dict in solutions.list_minio_solutions(): if sol_dict["Name"] != target_name: continue minio_wid = sol_dict["wids"][0] workload = zos.workloads.get(minio_wid) sol_uuid = solutions.get_solution_uuid(workload) if not sol_uuid: continue solution_workloads = solutions.get_workloads_by_uuid(sol_uuid) cluster_zdb_configs = [] for workload in solution_workloads: if workload.info.workload_type != WorkloadType.Zdb: continue # check the password # if no password, then it is old not supported # metadata is json serializable as the workload was identitified by solution_uuid metadata = serializers.json.loads( deployer.decrypt_metadata(workload.info.metadata)) password = metadata.get("password") if not password: j.logger.error( f"AUTO_TOPUP: zdb workload {workload.id} doesn't include password in metadata in s3 solution {sol_dict['Name']}" ) raise j.exceptions.Validation( f"AUTO_TOPUP: zdb workload {workload.id} doesn't include password in metadata in s3 solution {sol_dict['Name']}" ) zdb_url = deployer.get_zdb_url(workload.id, password, workload=workload) splits = zdb_url.split("@") zdb_dict = { "address": splits[1], "namespace": splits[0].split(":")[0], "password": password } cluster_zdb_configs.append(zdb_dict) if not cluster_zdb_configs: j.logger.error( f"AUTO_TOPUP: can't retrive zdb config of s3 solution {sol_dict['Name']} because of invalid zdb metadata" ) raise j.exceptions.Runtime( f"AUTO_TOPUP: can't retrive zdb config of s3 solution {sol_dict['Name']} because of invalid zdb metadata" ) return cluster_zdb_configs
def _filter_vdc_workloads(self): zos = get_zos() user_workloads = zos.workloads.list_workloads(self.identity_tid, next_action=NextAction.DEPLOY) result = [] for workload in user_workloads: if workload.info.workload_type not in VDC_WORKLOAD_TYPES: continue if not workload.info.description: continue try: description = j.data.serializers.json.loads(workload.info.description) except: continue if description.get("vdc_uuid") != self.solution_uuid: continue result.append(workload) return result
def from_workload(cls, workload): vmachine = cls() vmachine.wid = workload.id metadata = j.sals.reservation_chatflow.reservation_chatflow.decrypt_reservation_metadata(workload.info.metadata) metadata = j.data.serializers.json.loads(metadata) vmachine.name = metadata["form_info"]["name"] vmachine.pool_id = workload.info.pool_id vmachine.node_id = workload.info.node_id vmachine.size = workload.size vmachine.resources = VMSIZES.get(workload.size) vmachine.ip_address = workload.ipaddress if workload.public_ip: vmachine.public_ip.wid = workload.public_ip zos = get_zos() public_ip_workload = zos.workloads.get(workload.public_ip) address = str(netaddr.IPNetwork(public_ip_workload.ipaddress).ip) vmachine.public_ip.address = address return vmachine
def from_workload(cls, workload): node = cls() node.wid = workload.id node.ip_address = workload.ipaddress if workload.master_ips: node.role = KubernetesRole.WORKER else: node.role = KubernetesRole.MASTER node.node_id = workload.info.node_id node.pool_id = workload.info.pool_id if workload.public_ip: zos = get_zos() public_ip_workload = zos.workloads.get(workload.public_ip) address = str(netaddr.IPNetwork(public_ip_workload.ipaddress).ip) node.public_ip = address node._size = ( VDC_SIZE.K8SNodeFlavor(workload.size).value if workload.size in [size.value for size in K8S_SIZES] else VDC_SIZE.K8SNodeFlavor.SMALL.value ) return node
def job(self): zos = get_zos() threebot_workload_types = [ WorkloadType.Container, WorkloadType.Subdomain ] j.logger.info( "Check VDC threebot service: Check if Threebot containers in all VDCs is UP" ) for vdc_name in j.sals.vdc.list_all(): vdc_instance = j.sals.vdc.find(vdc_name) # Check if vdc is empty if vdc_instance.state == VDCSTATE.EMPTY: continue # double check state from explorer if vdc_instance.is_empty(): j.logger.warning( f"Check VDC threebot service: {vdc_name} is empty") gevent.sleep(0.1) continue vdc_instance.load_info() # Check if vdc has not minmal Components if (not vdc_instance.kubernetes ) or vdc_instance.expiration < j.data.time.now().timestamp: j.logger.warning( f"Check VDC threebot service: {vdc_name} is expired or not found" ) gevent.sleep(0.1) continue master_node = [ n for n in vdc_instance.kubernetes if n.role == KubernetesRole.MASTER ] if not master_node: j.logger.warning( f"Check VDC threebot service: {vdc_name} master not deployed" ) gevent.sleep(0.1) continue master_ip = master_node[-1].public_ip # Check if vdc master is not reachable if not j.sals.nettools.tcp_connection_test(master_ip, 6443, 10): j.logger.warning( f"Check VDC threebot service: {vdc_name} master node is not reachable on public ip: {master_ip}" ) gevent.sleep(0.1) continue vdc_indentity = "vdc_ident_" + vdc_instance.solution_uuid zos = get_zos(identity_name=vdc_indentity) # Check if threebot domain is not reachable if not j.sals.nettools.wait_http_test( f"https://{vdc_instance.threebot.domain}", timeout=10): last_failure = j.core.db.hget(failed_threebots, vdc_instance.instance_name) if not last_failure: j.core.db.hset(failed_threebots, vdc_instance.instance_name, j.data.time.utcnow().timestamp) continue if last_failure and j.data.time.utcnow().timestamp - float( last_failure.decode()) < 2 * 60 * 60: continue j.logger.warning( f"Check VDC threebot service: {vdc_name} threebot is DOWN") # List All workloads related to threebot workloads = [ workload for workload in _filter_vdc_workloads(vdc_instance) if workload.info.workload_type in threebot_workload_types ] pool_id = 0 # Decomission All the workloads related to threebot for workload in workloads: # Check that container is threebot not any other thing if workload.info.workload_type == WorkloadType.Container: if "js-sdk" in workload.flist: zdb_farms = workload.environment.get( "S3_AUTO_TOPUP_FARMS") pool_id = workload.info.pool_id else: continue zos.workloads.decomission(workload.id) # Deploy a new threebot container deployer = vdc_instance.get_deployer() try: kubeconfig = deployer.kubernetes.download_kube_config( master_ip) except Exception as e: j.logger.error( f"Check VDC threebot service: Failed to download kubeconfig for vdc {vdc_name} with error {e}" ) gevent.sleep(0.1) continue minio_wid = 0 try: zdb_farms = zdb_farms.split(",") threebot_wid = deployer.threebot.deploy_threebot( minio_wid, pool_id, kubeconfig, zdb_farms=zdb_farms) j.logger.info( f"Check VDC threebot service: {vdc_name} threebot new wid: {threebot_wid}" ) j.core.db.hdel(failed_threebots, vdc_instance.instance_name) except Exception as e: j.logger.error( f"Check VDC threebot service: Can't deploy threebot for {vdc_name} with error{e}" ) else: j.logger.info( f"Check VDC threebot service: {vdc_name} threebot is UP") j.core.db.hdel(failed_threebots, vdc_instance.instance_name) gevent.sleep(0.1)
def list_auto_top_up_config(): config = j.core.config.set_default( "S3_AUTO_TOP_SOLUTIONS", {"max_storage": 3 * 1024, "threshold": 0.7, "clear_threshold": 0.4, "targets": {}} ) if not isinstance(config, dict): j.logger.error("AUTO_TOPUP: S3_AUTO_TOP_SOLUTIONS config is not valid!") j.tools.alerthandler.alert_raise( app_name="s3_auto_topup", category="validation", message="AUTO_TOPUP: S3_AUTO_TOP_SOLUTIONS config is not valid!", alert_type="exception", ) return default_extension_size = config.get("extension_size", 10) default_max_storage = config.get("max_storage") default_threshold = config.get("threshold") default_clear_threshold = config.get("clear_threshold") default_farm_names = config.get("farm_names") defaults = [default_max_storage, default_threshold, default_clear_threshold, default_farm_names] if not all(defaults): j.logger.error("AUTO_TOPUP: S3_AUTO_TOP_SOLUTIONS config is not valid!") j.tools.alerthandler.alert_raise( app_name="s3_auto_topup", category="validation", message="AUTO_TOPUP: S3_AUTO_TOP_SOLUTIONS config is not valid!", alert_type="exception", ) return targets = config.get("targets", {}) if not isinstance(targets, dict): j.logger.error("AUTO_TOPUP: S3_AUTO_TOP_SOLUTIONS targets config is not valid!") j.tools.alerthandler.alert_raise( app_name="s3_auto_topup", category="validation", message="AUTO_TOPUP: S3_AUTO_TOP_SOLUTIONS targets config is not valid!", alert_type="exception", ) return zos = get_zos() minio_solutions = {sol["Name"]: sol for sol in solutions.list_minio_solutions()} for sol_name, sol_config in targets.items(): if sol_name not in minio_solutions: j.logger.warning(f"AUTO_TOPUP: solution {sol_name} is not a current s3 solution") continue minio_solution = minio_solutions[sol_name] minio_pool = zos.pools.get(minio_solution["Primary Pool"]) duration = minio_pool.empty_at - j.data.time.utcnow().timestamp workload = zos.workloads.get(minio_solution["wids"][0]) solution_uuid = solutions.get_solution_uuid(workload) if not isinstance(sol_config, dict) or not all( [key in sol_config for key in ["minio_api_url", "healing_url"]] ): j.logger.error(f"AUTO_TOPUP: target {sol_name} config is not valid!") j.tools.alerthandler.alert_raise( app_name="s3_auto_topup", category="validation", message=f"AUTO_TOPUP: target {sol_name} config is not valid!", alert_type="exception", ) continue yield { "name": sol_name, "solution_uuid": solution_uuid, "extension_size": sol_config.get("extension_size", default_extension_size), "minio_api_url": sol_config["minio_api_url"], "healing_url": sol_config["healing_url"], "max_storage": sol_config.get("max_storage", default_max_storage), "threshold": sol_config.get("threshold", default_threshold), "clear_threshold": sol_config.get("clear_threshold", default_clear_threshold), "duration": duration, "farm_names": sol_config.get("farm_names", default_farm_names), }
def add_all_farms(self): zos = get_zos() for farm in zos._explorer.farms.list(): self.get_scheduler(farm.name)
def report_vdc_status(vdc_name: str): """Shows all vdc workloads, nodes, status Args: vdc_name (string): target vdc to report on, None will get all values Returns: str: nice view for the vdc workloads """ vdc = j.sals.vdc.find(vdc_name, load_info=True) if not vdc: print("VDC not found.") return print("\nGetting VDC information ...\n") zos = get_zos() creation_time = vdc.created.strftime("%d/%m/%Y, %H:%M:%S") expiration_time = "EXPIRED" try: expiration_time = vdc.expiration_date.strftime("%d/%m/%Y, %H:%M:%S") except: pass flavor = vdc.flavor.value grace_period = "Yes" if vdc.is_blocked else "No" master_ip = "" threebot_ip = "" threebot_domain = "" master_ip_state = "Down" threebot_domain_state = "Down" if vdc.has_minimal_components(): master_ip = [ n for n in vdc.kubernetes if n.role == KubernetesRole.MASTER ][-1].public_ip threebot_ip = vdc.threebot.ip_address master_ip_state = "Up" if j.sals.nettools.tcp_connection_test( master_ip, 6443, 10) else "Down" threebot_domain = f"https://{vdc.threebot.domain}" threebot_domain_state = "Up" if j.sals.nettools.wait_http_test( threebot_domain, timeout=10) else "Down" print( f"Creation time: {creation_time}\n" f"Expiration time: {expiration_time}\n" f"Flavor: {flavor}\n" f"Grace Period: {grace_period}\n" f"Master IP: {master_ip} --> State: {master_ip_state}\n" f"Threebot IP: {threebot_ip}\n" f"Threebot Domain: {threebot_domain} --> State {threebot_domain_state}\n" ) workloads = _filter_vdc_workloads(vdc) try: j.data.terminaltable.print_table( f"Wallets", [ ["Name", "Address", "Balance"], [ vdc.prepaid_wallet.instance_name, vdc.prepaid_wallet.address, f"{vdc.prepaid_wallet.get_balance().balances[0].balance} TFT", ], [ vdc.provision_wallet.instance_name, vdc.provision_wallet.address, f"{vdc.provision_wallet.get_balance().balances[0].balance} TFT", ], ], ) except: print("\n<== No available wallets data. Expired or invalid vdc ==>") print("\n") workloads_list = [[ "Wid", "Type", "State", "Farm", "PoolID", "IPv4Units", "NodeID", "NState", "Message" ]] for workload in workloads: workload_type = workload.info.workload_type.name if not workload_type in ["Subdomain", "Reverse_proxy"]: farm_name = j.sals.marketplace.deployer.get_pool_farm_name( workload.info.pool_id) node_state = ("Up" if zos.nodes_finder.filter_is_up( zos._explorer.nodes.get(workload.info.node_id)) else "Down") else: farm_name = "Gateway" node_state = "Gateway" workloads_list.append([ workload.id, workload_type, workload.info.next_action.name, farm_name, workload.info.pool_id, zos.pools.get(workload.info.pool_id).ipv4us, workload.info.node_id, node_state, "\n".join(wrap(workload.info.result.message, 80)), ]) j.data.terminaltable.print_table(f"Workloads", workloads_list)
def extend_zdbs(name, pool_ids, solution_uuid, password, duration, size=10, wallet_name=None, nodes_ids=None, disk_type=DiskType.HDD): """ 1- create/extend pools with enough cloud units for the new zdbs 2- deploy a zdb with the same size and password for each wid 3- build the newly installed zdbs config 4- return wids, password """ description = j.data.serializers.json.dumps({"vdc_uuid": solution_uuid}) wallet_name = wallet_name or j.core.config.get("S3_AUTO_TOPUP_WALLET") wallet = j.clients.stellar.get(wallet_name) zos = get_zos() reservations = [] storage_query = {"hru": size} if disk_type == DiskType.SSD: storage_query = {"sru": size} pool_total_sus = defaultdict(int) for _, pool_id in enumerate(pool_ids): cloud_units = deployer.calculate_capacity_units(**storage_query) su = cloud_units.su pool_total_sus[pool_id] += su for pool_id, su in pool_total_sus.items(): su = su * duration pool_info = zos.pools.extend(pool_id, 0, su, 0) j.logger.info( f"AUTO TOPUP: extending pool {pool_id} with sus: {su}, reservation_id: {pool_info.reservation_id}" ) zos.billing.payout_farmers(wallet, pool_info) reservations.append({ "pool_id": pool_id, "reservation_id": pool_info.reservation_id }) for reservation in reservations: if not wait_pool_reservation(reservation["reservation_id"]): j.logger.warning( f"pool {reservation['pool_id']} extension timedout for reservation: {reservation['reservation_id']}" ) continue gs = GlobalScheduler() wids = [] for pool_id in pool_ids: nodes_generator = gs.nodes_by_capacity(pool_id=pool_id, ip_version="IPv6", **storage_query) if nodes_ids: nodes_generator = list(nodes_generator) nodes_generator_ids = [node.node_id for node in nodes_generator] unavailable_nodes_ids = set(nodes_ids) - set(nodes_generator_ids) if unavailable_nodes_ids: raise j.exceptions.Validation( f"Some nodes: {unavailable_nodes_ids} are not in the farm or don't have capacity" ) nodes_generator = [ node for node in nodes_generator if node.node_id in nodes_ids ] for node in nodes_generator: wid = deployer.deploy_zdb( pool_id=pool_id, node_id=node.node_id, size=size, disk_type=disk_type, mode=ZDBMode.Seq, password=password, form_info={"chatflow": "minio"}, name=name, solution_uuid=solution_uuid, description=description, ) try: success = deployer.wait_workload(wid, cancel_by_uuid=False) if not success: raise DeploymentFailed() wids.append(wid) j.logger.info( f"AUTO TOPUP: ZDB workload {wid} deployed successfully") break except DeploymentFailed: j.logger.error( f"AUTO TOPUP: ZDB workload {wid} failed to deploy") continue return wids, password