def stop_schedule(cluster_name: str, schedule_name: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details['cloud']['infra'] == 'azure': executor = GrassAzureExecutor(cluster_name=cluster_name) executor.stop_schedule(schedule_name=schedule_name)
def get_job_logs(cluster_name: str, job_name: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details['cloud']['infra'] == 'azure': executor = GrassAzureExecutor(cluster_name=cluster_name) executor.get_job_logs(job_name=job_name)
def with_checker(*args, **kwargs): # Get params cluster_name = kwargs['cluster_name'] # Get details try: cluster_details = load_cluster_details(cluster_name=cluster_name) except FileNotFoundError: raise CliException(f"Cluster {cluster_name} is not found") # Check details validity try: if mode == 'grass' and cluster_details['mode'] == 'grass': if cluster_details['cloud']['infra'] == 'azure': pass else: raise ParsingError(f"Details are broken: Invalid infra: {cluster_details['cloud']['infra']}") elif mode == 'k8s' and cluster_details['mode'] == 'k8s': if cluster_details['cloud']['infra'] == 'azure': pass else: raise ParsingError(f"Details are broken: Invalid infra: {cluster_details['cloud']['infra']}") else: raise ParsingError(f"Details are broken: Invalid mode: {cluster_details['mode']}") except KeyError as e: raise ParsingError(f"Details are broken: Missing key: '{e.args[0]}'") func(*args, **kwargs)
def list_job(cluster_name: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details['cloud']['infra'] == 'azure': executor = K8sAzureExecutor(cluster_name=cluster_name) executor.list_job()
def delete(self): # Load details cluster_name = self.cluster_name cluster_details = load_cluster_details(cluster_name=cluster_name) cluster_id = cluster_details['id'] resource_group = cluster_details['cloud']['resource_group'] logger.info(f"Deleting cluster {cluster_name}") # Get resource list resource_list = AzureExecutor.list_resources( resource_group=resource_group) # Filter resources deletable_ids = [] for resource_info in resource_list: if resource_info['name'].startswith(cluster_id): deletable_ids.append(resource_info['id']) # Delete resources if len(deletable_ids) > 0: AzureExecutor.delete_resources(resources=deletable_ids) # Delete cluster folder rmtree( os.path.expanduser(f"{GlobalPaths.MARO_CLUSTERS}/{cluster_name}")) logger.info_green(f"Cluster {cluster_name} is deleted")
def start_schedule(cluster_name: str, deployment_path: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details['cloud']['infra'] == 'azure': executor = GrassAzureExecutor(cluster_name=cluster_name) executor.start_schedule(deployment_path=deployment_path)
def setUpClass(cls, file_path: str = os.path.abspath(__file__)) -> None: # Get and set params GlobalParams.LOG_LEVEL = logging.DEBUG cls.test_id = uuid.uuid4().hex[:8] os.makedirs( os.path.expanduser(f"{GlobalPaths.MARO_TEST}/{cls.test_id}"), exist_ok=True) os.makedirs( os.path.expanduser(f"{GlobalPaths.MARO_TEST}/{cls.test_id}/tar"), exist_ok=True) cls.file_path = os.path.abspath(__file__) cls.dir_path = os.path.dirname(cls.file_path) cls.deployment_template_path = os.path.normpath( os.path.join(cls.dir_path, "../templates/test_k8s_azure_create.yml")) cls.deployment_path = os.path.expanduser( f"{GlobalPaths.MARO_TEST}/{cls.test_id}/test_k8s_azure_create.yml") cls.config_path = os.path.normpath( os.path.join(cls.dir_path, "../config.yml")) # Load config and save deployment with open(cls.deployment_template_path) as fr: deployment_details = yaml.safe_load(fr) with open(cls.config_path) as fr: config_details = yaml.safe_load(fr) if config_details["cloud/subscription"] and config_details[ "user/admin_public_key"]: deployment_details["cloud"]["subscription"] = config_details[ "cloud/subscription"] deployment_details["user"][ "admin_public_key"] = config_details[ "user/admin_public_key"] else: raise Exception("Invalid config") with open(cls.deployment_path, "w") as fw: yaml.safe_dump(deployment_details, fw) # Get params from deployments cls.cluster_name = deployment_details["name"] # Init test files cls.local_big_file_path = os.path.expanduser( f"{GlobalPaths.MARO_TEST}/{cls.test_id}/big_file") cls.local_small_files_path = os.path.expanduser( f"{GlobalPaths.MARO_TEST}/{cls.test_id}/small_files") command = f"dd if=/dev/zero of={cls.local_big_file_path} bs=1 count=0 seek=1G" SubProcess.run(command) command = f"git clone [email protected]:microsoft/maro.git {cls.local_small_files_path}" SubProcess.run(command) # Create cluster command = f"maro k8s create --debug {cls.deployment_path}" SubProcess.interactive_run(command) cls.cluster_details = load_cluster_details( cluster_name=cls.cluster_name) cls.cluster_id = cls.cluster_details["id"] cls.executor = K8sAksExecutor(cluster_name=cls.cluster_name) time.sleep(15) cls.pod_name = cls._get_redis_pod_name()
def delete(cluster_name: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] == "k8s/aks": executor = K8sAksExecutor(cluster_name=cluster_name) executor.delete() else: raise BadRequestError(f"Unsupported command in mode '{cluster_details['mode']}'.")
def node_leave(cluster_name: str, node_name: str, **kwargs): cluster_details = load_cluster_details(cluster_name) if cluster_details["mode"] != "grass/on-premises": raise BadRequestError("Node join cluster interrupted: Invalid mode.") executor = GrassOnPremisesExecutor(cluster_name) executor.node_leave_cluster(node_name)
def start_job(cluster_name: str, deployment_path: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] in ["grass/azure", "grass/on-premises"]: executor = GrassAzureExecutor(cluster_name=cluster_name) executor.start_job(deployment_path=deployment_path) else: raise BadRequestError(f"Unsupported command in mode '{cluster_details['mode']}'.")
def stop_node(cluster_name: str, replicas: int, node_size: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] == "grass/azure": executor = GrassAzureExecutor(cluster_name=cluster_name) executor.stop_node(replicas=replicas, node_size=node_size) else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def pull_data(cluster_name: str, local_path: str, remote_path: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] in ["grass/azure", "grass/on-premises"]: executor = GrassAzureExecutor(cluster_name=cluster_name) executor.pull_data(local_path=local_path, remote_path=remote_path) else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def get_job_logs(cluster_name: str, job_name: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] in ["grass/azure", "grass/on-premises"]: executor = GrassAzureExecutor(cluster_name=cluster_name) executor.get_job_logs(job_name=job_name) else: raise BadRequestError(f"Unsupported command in mode '{cluster_details['mode']}'.")
def remove_data(cluster_name: str, remote_path: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] == "k8s/aks": executor = K8sAksExecutor(cluster_name=cluster_name) executor.remove_data(remote_path=remote_path) else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def status(cluster_name: str, resource_name: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] in ["grass/azure", "grass/on-premises"]: executor = GrassAzureExecutor(cluster_name=cluster_name) executor.status(resource_name=resource_name) else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def clean(cluster_name: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] == "grass/azure": executor = GrassAzureExecutor(cluster_name=cluster_name) executor.clean() else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def start_job(cluster_name: str, deployment_path: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] == "k8s/aks": executor = K8sAksExecutor(cluster_name=cluster_name) executor.start_job(deployment_path=deployment_path) else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def stop_schedule(cluster_name: str, schedule_name: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] in ["grass/azure", "grass/on-premises"]: executor = GrassAzureExecutor(cluster_name=cluster_name) executor.stop_schedule(schedule_name=schedule_name) else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def get_job_logs(cluster_name: str, job_name: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] == "k8s/aks": executor = K8sAksExecutor(cluster_name=cluster_name) executor.get_job_logs(job_name=job_name) else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def stop_schedule(cluster_name: str, schedule_name: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] == "k8s/aks": executor = K8sAksExecutor(cluster_name=cluster_name) executor.stop_schedule(schedule_name=schedule_name) else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def push_image(cluster_name: str, image_name: str, image_path: str, remote_context_path: str, remote_image_name: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details['cloud']['infra'] == 'azure': executor = GrassAzureExecutor(cluster_name=cluster_name) executor.push_image(image_name=image_name, image_path=image_path, remote_context_path=remote_context_path, remote_image_name=remote_image_name)
def scale_node(cluster_name: str, replicas: int, node_size: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] == "k8s/aks": executor = K8sAksExecutor(cluster_name=cluster_name) executor.scale_node( replicas=replicas, node_size=node_size ) else: raise BadRequestError(f"Unsupported command in mode '{cluster_details['mode']}'.")
def delete(cluster_name: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] == "grass/azure": executor = GrassAzureExecutor(cluster_name=cluster_name) executor.delete() elif cluster_details["mode"] == "grass/on-premises": executor = GrassOnPremisesExecutor(cluster_name=cluster_name) executor.delete() else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def pull_data(cluster_name: str, local_path: str, remote_path: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) admin_username = cluster_details['user']['admin_username'] master_public_ip_address = cluster_details['master']['public_ip_address'] copy_files_from_node( local_dir=local_path, remote_path= f"{GlobalPaths.MARO_CLUSTERS}/{cluster_name}/data/{remote_path}", admin_username=admin_username, node_ip_address=master_public_ip_address)
def push_image(cluster_name: str, image_name: str, image_path: str, remote_context_path: str, remote_image_name: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details["mode"] in ["grass/azure", "grass/on-premises"]: executor = GrassAzureExecutor(cluster_name=cluster_name) executor.push_image(image_name=image_name, image_path=image_path, remote_context_path=remote_context_path, remote_image_name=remote_image_name) else: raise BadRequestError( f"Unsupported command in mode '{cluster_details['mode']}'.")
def pull_data(cluster_name: str, local_path: str, remote_path: str, **kwargs): # Load details cluster_details = load_cluster_details(cluster_name=cluster_name) admin_username = cluster_details['user']['admin_username'] master_public_ip_address = cluster_details['master']['public_ip_address'] if not remote_path.startswith("/"): raise CliException("Invalid remote path") copy_files_from_node( local_dir=local_path, remote_path= f"{GlobalPaths.MARO_CLUSTERS}/{cluster_name}/data{remote_path}", admin_username=admin_username, node_ip_address=master_public_ip_address)
def with_checker(*args, **kwargs): # Get params cluster_name = kwargs["cluster_name"] # Get details try: cluster_details = load_cluster_details(cluster_name=cluster_name) # Check details validity if cluster_details["mode"] not in { "grass/azure", "k8s/aks", "grass/on-premises" }: raise ClusterInternalError( f"Cluster details are broken: Invalid mode '{cluster_details['mode']}'." ) except FileNotFoundError: raise BadRequestError(f"Cluster '{cluster_name}' is not found.") except KeyError: raise ClusterInternalError( "Cluster details are broken: Missing key 'mode'.") func(*args, **kwargs)
def __init__(self, cluster_name: str): self.cluster_name = cluster_name self.cluster_details = load_cluster_details(cluster_name=cluster_name)
def list_node(cluster_name: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details['cloud']['infra'] == 'azure': executor = GrassAzureExecutor(cluster_name=cluster_name) executor.list_node()
def stop_node(cluster_name: str, replicas: int, node_size: str, **kwargs): cluster_details = load_cluster_details(cluster_name=cluster_name) if cluster_details['cloud']['infra'] == 'azure': executor = GrassAzureExecutor(cluster_name=cluster_name) executor.stop_node(replicas=replicas, node_size=node_size)