def new_driver(config: Dict, ge_env: GridEngineEnvironment) -> "GridEngineDriver": import importlib ge_config = config.get("gridengine", {}) # # just shorthand for gridengine.deferdriver.DeferredDriver # if ge_config.get("driver_scripts_dir"): # deferred_qname = "gridengine.deferdriver.DeferredDriver" # if ge_config.get("driver", deferred_qname) == deferred_qname: # ge_config["driver"] = deferred_qname driver_expr = ge_config.get("driver", "gridengine.driver.new_driver") if "." not in driver_expr: raise BadDriverError(driver_expr) module_expr, func_or_class_name = driver_expr.rsplit(".", 1) try: module = importlib.import_module(module_expr) except Exception as e: logging.exception( "Could not load module %s. Is it in the" + " PYTHONPATH environment variable? %s", str(e), sys.path, ) raise func_or_class = getattr(module, func_or_class_name) return func_or_class(config, ge_env)
def new_driver(config: Dict) -> "PBSProDriver": import importlib pbs_config = config.get("pbs", {}) driver_expr = pbs_config.get("driver", "pbs.driver.new_driver") if "." not in driver_expr: raise BadDriverError(driver_expr) module_expr, func_or_class_name = driver_expr.rsplit(".", 1) try: module = importlib.import_module(module_expr) except Exception as e: logging.exception( "Could not load module %s. Is it in the" + " PYTHONPATH environment variable? %s", str(e), sys.path, ) raise func_or_class = getattr(module, func_or_class_name) return func_or_class(config)
def initialize_db(path: str, read_only: bool, uri: bool = False) -> sqlite3.Connection: file_uri = path try: if read_only: path = os.path.abspath(path) # just use an in memory db if this is the first time this is run if not os.path.exists(path): file_uri = "file:memory" else: file_uri = "file://{}?mode=ro".format(path) conn = sqlite3.connect(file_uri, uri=True) # uninitialized file conns will fail here, so just # use memory instead try: conn = sqlite3.connect(file_uri, uri=True) except sqlite3.OperationalError: conn = sqlite3.connect("file:memory", uri=True) else: conn = sqlite3.connect(path, uri=uri) except sqlite3.OperationalError as e: logging.exception("Error while opening %s - %s", file_uri, e) raise try: conn.execute("CREATE TABLE metadata (version)") except sqlite3.OperationalError as e: if "table metadata already exists" not in e.args: raise cursor = conn.execute("SELECT version from metadata") version_result = list(cursor) if version_result: version = version_result[0][0] else: conn.execute("INSERT INTO metadata (version) VALUES ('{}')".format( SQLITE_VERSION)) version = SQLITE_VERSION if version != SQLITE_VERSION: conn.close() new_path = "{}.{}".format(path, version) print("Invalid version - moving to {}".format(new_path)) shutil.move(path, new_path) return initialize_db(path, read_only) try: conn.execute( """CREATE TABLE nodes (node_id TEXT PRIMARY KEY, hostname TEXT, last_match_time REAL, create_time REAL, delete_time REAL)""") except sqlite3.OperationalError as e: if "table nodes already exists" not in e.args: raise return conn
def parse_scheduler_nodes( config: Dict, pbscmd: PBSCMD, resource_definitions: Dict[str, PBSProResourceDefinition], ) -> List[Node]: """ Gets the current state of the nodes as the scheduler sees them, including resources, assigned resources, jobs currently running etc. """ ret: List[Node] = [] ignore_onprem = config.get("pbspro", {}).get("ignore_onprem", False) ignore_hostnames_re_expr = config.get("pbspro", {}).get("ignore_hostnames_re") ignore_hostnames_re = None if ignore_hostnames_re_expr: try: ignore_hostnames_re = re.compile(ignore_hostnames_re_expr) except: logging.exception( f"Could not parse {ignore_hostnames_re_expr} as a regular expression" ) ignored_hostnames = [] for ndict in pbscmd.pbsnodes_parsed("-a"): if ignore_hostnames_re and ignore_hostnames_re.match(ndict["name"]): ignored_hostnames.append(ndict["name"]) continue if ignore_onprem and ndict.get("resources_available.ccnodeid"): ignored_hostnames.append(ndict["name"]) continue node = parse_scheduler_node(ndict, resource_definitions) if not node.available.get("ccnodeid"): node.metadata["override_resources"] = False logging.fine( "'ccnodeid' is not defined so %s has not been joined to the cluster by the autoscaler" + " yet or this is not a CycleCloud managed node", node, ) ret.append(node) if ignored_hostnames: if len(ignored_hostnames) < 5: logging.info( f"Ignored {len(ignored_hostnames)} hostnames. {','.join(ignored_hostnames)}" ) else: logging.info( f"Ignored {len(ignored_hostnames)} hostnames. {','.join(ignored_hostnames[:5])}..." ) return ret
def _inititialize_impl() -> None: global VM_SIZES if VM_SIZES: return try: with open(RESOURCE_FILE) as fr: VM_SIZES = json.load(fr) except Exception: logging.exception(( "Could not load resource file {}. Auxiliary vm size information " + "(vm_family, gpu_count, capabilities etc) will be unavailable." ).format(RESOURCE_FILE))
import json import os from typing import Any, Dict, Optional import hpc.autoscale.hpclogging as logging from hpc.autoscale import hpctypes as ht RESOURCE_FILE = os.path.join(os.path.dirname(__file__), "vm_sizes.json") VM_SIZES = {} try: with open(RESOURCE_FILE) as fr: VM_SIZES = json.load(fr) except Exception: logging.exception( ("Could not load resource file {}. Auxiliary vm size information " + "(vm_family, gpu_count, capabilities etc) will be unavailable." ).format(RESOURCE_FILE)) class AuxVMSizeInfo: def __init__(self, record: Dict[str, Any]): self.__record = record self.__capabilities = record.get("capabilities", {}) self.__memory = ht.Memory(self.__capabilities.get("MemoryGB", -1), "g") @property def memory(self) -> ht.Memory: return self.__memory @property def infiniband(self) -> bool:
def autoscale_grid_engine( config: Dict[str, Any], ge_env: Optional[GridEngineEnvironment] = None, ge_driver: Optional["GridEngineDriver"] = None, ctx_handler: Optional[DefaultContextHandler] = None, node_history: Optional[NodeHistory] = None, dry_run: bool = False, ) -> DemandResult: global _exit_code assert not config.get("read_only", False) if dry_run: logging.warning("Running gridengine autoscaler in dry run mode") # allow multiple instances config["lock_file"] = None # put in read only mode config["read_only"] = True if ge_env is None: ge_env = envlib.from_qconf(config) # interface to GE, generally by cli if ge_driver is None: # allow tests to pass in a mock ge_driver = new_driver(config, ge_env) ge_driver.initialize_environment() config = ge_driver.preprocess_config(config) logging.fine("Driver = %s", ge_driver) invalid_nodes = [] # we need an instance without any scheduler nodes, so don't # pass in the existing nodes. tmp_node_mgr = new_node_manager(config) by_hostname = partition_single(tmp_node_mgr.get_nodes(), lambda n: n.hostname_or_uuid) for node in ge_env.nodes: # many combinations of a u and other states. However, # as long as a and u are in there it is down state = node.metadata.get("state", "") cc_node = by_hostname.get(node.hostname) ccnodeid = node.resources.get("ccnodeid") if cc_node: if not ccnodeid or ccnodeid == cc_node.delayed_node_id.node_id: if cc_node.state in ["Preparing", "Acquiring"]: continue if "a" in state and "u" in state: invalid_nodes.append(node) # nodes in error state must also be deleted nodes_to_delete = ge_driver.clean_hosts(invalid_nodes) for node in nodes_to_delete: ge_env.delete_node(node) demand_calculator = calculate_demand(config, ge_env, ge_driver, ctx_handler, node_history) ge_driver.handle_failed_nodes( demand_calculator.node_mgr.get_failed_nodes()) demand_result = demand_calculator.finish() if ctx_handler: ctx_handler.set_context("[joining]") # details here are that we pass in nodes that matter (matched) and the driver figures out # which ones are new and need to be added via qconf joined = ge_driver.handle_join_cluster( [x for x in demand_result.compute_nodes if x.exists]) ge_driver.handle_post_join_cluster(joined) if ctx_handler: ctx_handler.set_context("[scaling]") # bootup all nodes. Optionally pass in a filtered list if demand_result.new_nodes: if not dry_run: demand_calculator.bootup() if not dry_run: demand_calculator.update_history() # we also tell the driver about nodes that are unmatched. It filters them out # and returns a list of ones we can delete. idle_timeout = int(config.get("idle_timeout", 300)) boot_timeout = int(config.get("boot_timeout", 3600)) logging.fine("Idle timeout is %s", idle_timeout) unmatched_for_5_mins = demand_calculator.find_unmatched_for( at_least=idle_timeout) timed_out_booting = demand_calculator.find_booting(at_least=boot_timeout) # I don't care about nodes that have keep_alive=true timed_out_booting = [n for n in timed_out_booting if not n.keep_alive] timed_out_to_deleted = [] unmatched_nodes_to_delete = [] if timed_out_booting: logging.info("The following nodes have timed out while booting: %s", timed_out_booting) timed_out_to_deleted = ge_driver.handle_boot_timeout( timed_out_booting) or [] if unmatched_for_5_mins: node_expr = ", ".join([str(x) for x in unmatched_for_5_mins]) logging.info("Unmatched for at least %s seconds: %s", idle_timeout, node_expr) unmatched_nodes_to_delete = ( ge_driver.handle_draining(unmatched_for_5_mins) or []) nodes_to_delete = [] for node in timed_out_to_deleted + unmatched_nodes_to_delete: if node.assignments: logging.warning( "%s has jobs assigned to it so we will take no action.", node) continue nodes_to_delete.append(node) if nodes_to_delete: try: logging.info("Deleting %s", [str(n) for n in nodes_to_delete]) delete_result = demand_calculator.delete(nodes_to_delete) if delete_result: # in case it has anything to do after a node is deleted (usually just remove it from the cluster) ge_driver.handle_post_delete(delete_result.nodes) except Exception as e: _exit_code = 1 logging.warning( "Deletion failed, will retry on next iteration: %s", e) logging.exception(str(e)) print_demand(config, demand_result, log=not dry_run) return demand_result
def _parse_complexes( autoscale_config: Dict, complex_lines: List[str] ) -> Dict[str, "Complex"]: relevant_complexes = None if autoscale_config: relevant_complexes = autoscale_config.get("gridengine", {}).get( "relevant_complexes" ) if relevant_complexes: # special handling of ccnodeid, since it is something we # create for the user relevant_complexes = relevant_complexes + ["ccnodeid"] if relevant_complexes: logging.info( "Restricting complexes for autoscaling to %s", relevant_complexes ) complexes: List[Complex] = [] headers = complex_lines[0].lower().replace("#", "").split() required = set(["name", "type", "consumable"]) missing = required - set(headers) if missing: logging.error( "Could not parse complex file as it is missing expected columns: %s." + " Autoscale likely will not work.", list(missing), ) return {} for n, line in enumerate(complex_lines[1:]): if line.startswith("#"): continue toks = line.split() if len(toks) != len(headers): logging.warning( "Could not parse complex at line {} - ignoring: '{}'".format(n, line) ) continue c = dict(zip(headers, toks)) try: if ( relevant_complexes and c["name"] not in relevant_complexes and c["shortcut"] not in relevant_complexes ): logging.trace( "Ignoring complex %s because it was not defined in gridengine.relevant_complexes", c["name"], ) continue complex = Complex( name=c["name"], shortcut=c.get("shortcut", c["name"]), complex_type=c["type"], relop=c.get("relop", "=="), requestable=c.get("requestable", "YES").lower() == "yes", consumable=c.get("consumable", "YES").lower() == "yes", default=c.get("default"), urgency=int(c.get("urgency", 0)), ) complexes.append(complex) except Exception: logging.exception("Could not parse complex %s - %s", line, c) # TODO test RDH ret = partition_single(complexes, lambda x: x.name) shortcut_dict = partition_single(complexes, lambda x: x.shortcut) ret.update(shortcut_dict) return ret
def autoscale_pbspro( config: Dict[str, Any], pbs_env: Optional[PBSProEnvironment] = None, pbs_driver: Optional[PBSProDriver] = None, ctx_handler: Optional[DefaultContextHandler] = None, node_history: Optional[NodeHistory] = None, dry_run: bool = False, ) -> DemandResult: global _exit_code assert not config.get("read_only", False) if dry_run: logging.warning("Running pbs autoscaler in dry run mode") # allow multiple instances config["lock_file"] = None # put in read only mode config["read_only"] = True # interface to PBSPro, generally by cli if pbs_driver is None: # allow tests to pass in a mock pbs_driver = PBSProDriver(config) if pbs_env is None: pbs_env = envlib.from_driver(config, pbs_driver) pbs_driver.initialize() config = pbs_driver.preprocess_config(config) logging.debug("Driver = %s", pbs_driver) demand_calculator = calculate_demand(config, pbs_env, ctx_handler, node_history) failed_nodes = demand_calculator.node_mgr.get_failed_nodes() for node in pbs_env.scheduler_nodes: if "down" in node.metadata.get("pbs_state", ""): failed_nodes.append(node) pbs_driver.handle_failed_nodes(failed_nodes) demand_result = demand_calculator.finish() if ctx_handler: ctx_handler.set_context("[joining]") # details here are that we pass in nodes that matter (matched) and the driver figures out # which ones are new and need to be added joined = pbs_driver.add_nodes_to_cluster( [x for x in demand_result.compute_nodes if x.exists]) pbs_driver.handle_post_join_cluster(joined) if ctx_handler: ctx_handler.set_context("[scaling]") # bootup all nodes. Optionally pass in a filtered list if demand_result.new_nodes: if not dry_run: demand_calculator.bootup() if not dry_run: demand_calculator.update_history() # we also tell the driver about nodes that are unmatched. It filters them out # and returns a list of ones we can delete. idle_timeout = int(config.get("idle_timeout", 300)) boot_timeout = int(config.get("boot_timeout", 3600)) logging.fine("Idle timeout is %s", idle_timeout) unmatched_for_5_mins = demand_calculator.find_unmatched_for( at_least=idle_timeout) timed_out_booting = demand_calculator.find_booting(at_least=boot_timeout) # I don't care about nodes that have keep_alive=true timed_out_booting = [n for n in timed_out_booting if not n.keep_alive] timed_out_to_deleted = [] unmatched_nodes_to_delete = [] if timed_out_booting: logging.info("The following nodes have timed out while booting: %s", timed_out_booting) timed_out_to_deleted = pbs_driver.handle_boot_timeout( timed_out_booting) or [] if unmatched_for_5_mins: logging.info("unmatched_for_5_mins %s", unmatched_for_5_mins) unmatched_nodes_to_delete = ( pbs_driver.handle_draining(unmatched_for_5_mins) or []) nodes_to_delete = [] for node in timed_out_to_deleted + unmatched_nodes_to_delete: if node.assignments: logging.warning( "%s has jobs assigned to it so we will take no action.", node) continue nodes_to_delete.append(node) if nodes_to_delete: try: logging.info("Deleting %s", [str(n) for n in nodes_to_delete]) delete_result = demand_calculator.delete(nodes_to_delete) if delete_result: # in case it has anything to do after a node is deleted (usually just remove it from the cluster) pbs_driver.handle_post_delete(delete_result.nodes) except Exception as e: _exit_code = 1 logging.warning( "Deletion failed, will retry on next iteration: %s", e) logging.exception(str(e)) print_demand(config, demand_result, log=not dry_run) return demand_result