示例#1
0
def call(cmd: List[str]) -> None:
    shlexed = " ".join([shlex.quote(x) for x in cmd])
    logging.trace("Running '%s'", shlexed)
    _QCMD_LOGGER.info(shlexed)
    stderr = ""
    completed_process = None
    try:
        # capture_output was added in 3.7 and we support as far back as 3.6
        if sys.version_info < (3, 7):
            completed_process = subprocess.run(cmd, stderr=subprocess.PIPE)
        else:
            completed_process = subprocess.run(cmd, capture_output=True)

        if completed_process.returncode != 0:
            if completed_process.stderr:
                stderr = completed_process.stderr.decode()
            logging.warning(
                "'%s' failed with exit %d: Stderr '%s'",
                shlexed,
                completed_process.returncode,
                stderr,
            )
    except Exception as e:
        logging.error("'%s' failed: %s.", shlexed, str(e))
        _QCMD_LOGGER.error(">> %s", str(e))
        raise
示例#2
0
    def __init__(
        self,
        name: str,
        consumed_core_count: int,
        max_core_count: int,
        consumed_count: Optional[int] = None,
        max_count: Optional[int] = None,
    ):
        self._name = name
        assert (consumed_core_count is not None
                ), "consumed_core_count is None for limit {}".format(name)
        assert max_core_count is not None, "max_core_count is None for limit {}".format(
            name)
        self._consumed_core_count = max(0, consumed_core_count)
        self._max_core_count = max(0, max_core_count)
        if self._consumed_core_count > self._max_core_count:
            logging.warning(
                "consumed_core_count(%s) > max_core_count(%s) for %s limit. Flooring it.",
                self._consumed_core_count,
                self._max_core_count,
                name,
            )
            self._consumed_core_count = self._max_core_count

        if max_count is None:
            assert consumed_count is None
        else:
            assert consumed_count is not None

        self.__max_count = max_count
        self.__consumed_count = consumed_count
示例#3
0
    def parse_select(self, select_expression: str) -> List[Dict[str, Any]]:
        # Need to detect when slot_type is specified with `-l select=1:slot_type`
        assert isinstance(select_expression, str)
        chunks: List[Dict[str, Any]] = []

        for chunk_expr in select_expression.split("+"):
            chunk = {}
            # give a default of 1 in case the user assumes 1 with their select
            # i.e. -l select=1:mem=16gb == -l select=mem=16gb
            # if they picked a number it will be overridden below
            chunk["select"] = "1"
            chunk["schedselect"] = "1"
            for expr in chunk_expr.split(":"):
                value: Any

                if "=" not in expr:
                    key, value = "select", int(expr)
                else:
                    key, value = expr.split("=", 1)
                    if key in self.resource_definitions:
                        value = self.resource_definitions[key].type.parse(
                            value)
                    else:
                        logging.warning(
                            "Unknown resource %s: treating as a string.", key)
                    chunk[key] = value
            chunks.append(chunk)

        return chunks
示例#4
0
def read_parallel_environments(
    autoscale_config: Dict,
    qbin: QBin,
) -> Dict[str, "ParallelEnvironment"]:
    parallel_envs = {}
    pe_config = autoscale_config.get("gridengine", {}).get("pes", {})
    pe_names = qbin.qconf(["-spl"]).splitlines()

    for pe_name in pe_names:
        pe_name = pe_name.strip()
        lines = qbin.qconf(["-sp", pe_name]).splitlines(False)
        pe = parse_ge_config(lines)

        req_key = "requires_placement_groups"

        if req_key in pe_config.get(pe_name, {}):
            logging.warning(
                "Overriding placement group behavior for PE %s with %s",
                pe_name,
                pe_config[pe_name][req_key],
            )
            pe[req_key] = pe_config[pe_name][req_key]

        parallel_envs[pe_name] = ParallelEnvironment(pe)

    return parallel_envs
示例#5
0
def is_valid_hostname(config: Dict, node: "Node") -> bool:
    # delayed import, as logging will import this module
    from hpc.autoscale import hpclogging as logging

    if not node.hostname:
        return False

    valid_hostnames: Optional[List[str]] = config.get("valid_hostnames")

    if not valid_hostnames:
        if is_standalone_dns(node):
            valid_hostnames = ["^ip-[0-9A-Za-z]{8}$"]
        else:
            return True

    for valid_hostname in valid_hostnames:
        if re.match(valid_hostname, node.hostname):
            return True

    logging.warning(
        "Rejecting invalid hostname '%s': Did not match any of the following patterns: %s",
        node.hostname,
        valid_hostnames,
    )
    return False
示例#6
0
 def hpcwrapper(*args: Any, **kwargs: Any) -> Optional[Any]:
     if function.__name__ in WHITELIST_FUNCTIONS_TYPES:
         if not hasattr(hpcwrapper, "hpcwarned"):
             setattr(hpcwrapper, "hpcwarned", True)
             logging.warning(
                 "Runtime type checking is disabled for %s", function.__name__
             )
     return typechecked_func(*args, **kwargs)
示例#7
0
    def decorate(self, nodes: typing.List[Node]) -> None:
        if not nodes:
            nodes = []

        nodes = [n for n in nodes if n.exists]
        equalities = [
            " (node_id == '{}') ".format(n.delayed_node_id.node_id)
            for n in nodes
        ]

        if not equalities:
            return

        stmt = "select node_id, last_match_time, create_time, delete_time from nodes where {}".format(
            "{}".format(" OR ".join(equalities)))

        rows = self._execute(stmt)
        rows_by_id = partition_single(list(rows), lambda r: r[0])

        now = self.now()

        for node in nodes:
            node_id = node.delayed_node_id.node_id

            # should be impossible because we already filtered by exists
            if not node_id:
                logging.warning(
                    "Null node_id for %s. Leaving create/last_match/delete times as null.",
                    node,
                )
                continue

            if node_id in rows_by_id:

                node_id, last_match_time, create_time, delete_time = rows_by_id[
                    node_id]
                node.create_time_unix = create_time
                node.last_match_time_unix = last_match_time
                node.delete_time_unix = delete_time

                if self.create_timeout:
                    create_elapsed = max(0, now - create_time)
                    create_remaining = max(
                        0, self.create_timeout - create_elapsed)
                    node.create_time_remaining = create_remaining

                if self.last_match_timeout:
                    if node.keep_alive:
                        node.idle_time_remaining = -1
                    else:
                        match_elapsed = max(0, now - last_match_time)
                        match_remaining = max(
                            0, self.last_match_timeout - match_elapsed)
                        node.idle_time_remaining = match_remaining
示例#8
0
def get_pbspro_parser() -> PBSProParser:
    global _PARSER
    if _PARSER is None:
        # avoid circular import
        from pbspro.pbscmd import PBSCMD
        from pbspro.resource import read_resource_definitions

        # chicken / egg issue: we want the  resource definitions
        # as a member of the parser, but we need the parser to parse
        # the definitions...
        # So create temp parser with no resource definitions
        _PARSER = PBSProParser({})
        pbscmd = PBSCMD(_PARSER)
        logging.warning("Using uninitialized PBSProParser: please call" +
                        " set_pbspro_parser before calling get_pbspro_parser")
        resource_definitions = read_resource_definitions(pbscmd, {})
        _PARSER = PBSProParser(resource_definitions)
    return _PARSER
示例#9
0
def read_resource_definitions(
        pbscmd: PBSCMD, config: Dict) -> Dict[str, "PBSProResourceDefinition"]:
    ret: Dict[str, PBSProResourceDefinition] = {}
    res_dicts = pbscmd.qmgr_parsed("list", "resource")

    res_names = set([x["name"] for x in res_dicts])

    # TODO I believe this is the only one, but leaving a config option
    # as a backup plan
    read_only = config.get("pbspro", {}).get("read_only_resources",
                                             ["host", "vnode"])

    def_sched = pbscmd.qmgr_parsed("list", "sched", "default")
    sched_priv = def_sched[0]["sched_priv"]
    sched_config = os.path.join(sched_priv, "sched_config")
    from pbspro.parser import PBSProParser

    parser = PBSProParser(config)
    sched_resources = parser.parse_resources_from_sched_priv(sched_config)

    missing_res = sched_resources - res_names
    missing_res_dicts = []
    for res_name in missing_res:
        try:
            missing_res_dicts.extend(
                pbscmd.qmgr_parsed("list", "resource", res_name))
        except CalledProcessError as e:
            logging.warning(
                "Could not find resource %s that was defined in %s, Ignoring",
                res_name,
                sched_config,
            )
            logging.fine(e)

    for rdict in res_dicts + missing_res_dicts:
        name = rdict["name"]
        res_type = RESOURCE_TYPES[rdict["type"]]
        flag: ResourceFlag = rdict.get("flag", "")  # type: ignore
        ret[name] = PBSProResourceDefinition(name, res_type, flag)
        if name in read_only:
            ret[name].read_only = True

    return ret
示例#10
0
    def __init__(
        self,
        hostname: str,
        resources: typing.Optional[dict] = None,
        bucket_id: typing.Optional[ht.BucketId] = None,
    ) -> None:
        resources = resources or ht.ResourceDict({})
        private_ip: typing.Optional[ht.IpAddress]
        if SchedulerNode.ignore_hostnames:
            private_ip = None
        else:
            try:
                private_ip = ht.IpAddress(socket.gethostbyname(hostname))
            except Exception as e:
                logging.warning("Could not find private ip for %s: %s", hostname, e)
                private_ip = None

        Node.__init__(
            self,
            node_id=DelayedNodeId(ht.NodeName(hostname)),
            name=ht.NodeName(hostname),
            nodearray=ht.NodeArrayName("unknown"),
            bucket_id=bucket_id or ht.BucketId(str(uuid4())),
            hostname=ht.Hostname(hostname),
            private_ip=private_ip,
            instance_id=None,
            vm_size=ht.VMSize("unknown"),
            location=ht.Location("unknown"),
            spot=False,
            vcpu_count=1,
            memory=ht.Memory(0, "b"),
            infiniband=False,
            state=ht.NodeStatus("running"),
            target_state=ht.NodeStatus("running"),
            power_state=ht.NodeStatus("running"),
            exists=True,
            placement_group=None,
            managed=False,
            resources=ht.ResourceDict(resources),
            software_configuration=ImmutableOrderedDict({}),
            keep_alive=False,
        )
示例#11
0
    def update_scheduler_nodes(self,
                               scheduler_nodes: List[SchedulerNode]) -> None:

        by_hostname: Dict[str, Node] = partition_single(
            self.__scheduler_nodes_queue,
            lambda n: n.hostname_or_uuid  # type: ignore
        )

        for new_snode in scheduler_nodes:
            if new_snode.hostname not in by_hostname:
                by_hostname[new_snode.hostname] = new_snode
                self.__scheduler_nodes_queue.push(new_snode)
                self.node_mgr.add_unmanaged_nodes([new_snode])
                if new_snode.resources.get("ccnodeid"):
                    logging.warning(
                        "%s has ccnodeid defined, but no longer exists in CycleCloud",
                        new_snode,
                    )
                else:
                    logging.debug(
                        "Found new node[hostname=%s] that does not exist in CycleCloud",
                        new_snode.hostname,
                    )

                # TODO inform bucket catalog?
            elif new_snode.metadata.get("override_resources", True):

                old_snode = by_hostname[new_snode.hostname_or_uuid]
                logging.fine(
                    "Found existing CycleCloud node[hostname=%s]",
                    new_snode.hostname,
                )
                old_snode.update(new_snode)
            else:
                logging.fine(
                    "Found existing CycleCloud node[hostname=%s], but node.metadata.override_resources=false"
                    +
                    " so ignoring the reported resources and only copying metadata",
                    new_snode.hostname,
                )
                old_snode = by_hostname[new_snode.hostname_or_uuid]
                old_snode.metadata.update(new_snode.metadata)
示例#12
0
    def satisfied_by_node(self, node: "Node") -> SatisfiedResult:

        if self.attr not in node.available:
            # TODO log
            msg = "Resource[name={}] is not defined for Node[name={}]".format(
                self.attr, node.name)
            return SatisfiedResult(
                "UndefinedResource",
                self,
                node,
                [msg],
            )

        try:
            if node.available[self.attr] >= self.value:
                return SatisfiedResult(
                    "success",
                    self,
                    node,
                )
        except TypeError as e:
            logging.warning(
                "For attribute %s: Could not evaluate %s >= %s because they are different types: %s",
                self.attr,
                node.available[self.attr],
                self.value,
                e,
            )

        msg = "Resource[name={} value={}] < Node[name={} value={}]".format(
            self.attr,
            self.value,
            node.name,
            node.available[self.attr],
        )
        return SatisfiedResult(
            "InsufficientResource",
            self,
            node,
            reasons=[msg],
        )
示例#13
0
def get_node_hostgroups(config: Dict, node: Node) -> List[str]:
    hostgroups_expr = node.metadata.get("gridengine_hostgroups")

    if not hostgroups_expr:
        hostgroups_expr = node.software_configuration.get(
            "gridengine_hostgroups")

    if not hostgroups_expr:
        default_hostgroups = config.get("gridengine",
                                        {}).get("default_hostgroups", [])
        for dh in default_hostgroups:
            if "select" not in dh:
                logging.warning(
                    "Missing key 'select' in gridengine.default_hostgroups %s",
                    dh)
                continue
            if "hostgroups" not in dh:
                logging.warning(
                    "Missing key 'hostgroups' in gridengine.default_hostgroups %s",
                    dh)
                continue
            constraint_list = constraints.get_constraints(dh["select"])
            satisfied = True
            for c in constraint_list:
                if not c.satisfied_by_node(node):
                    satisfied = False
                    break
            if satisfied:
                hostgroups = dh["hostgroups"]
                if isinstance(hostgroups, str):
                    hostgroups = [hostgroups]

                hostgroups_expr = " ".join(hostgroups)
                # set it in metadata so we can output it in the cli
                node.metadata["gridengine_hostgroups"] = hostgroups_expr

    if hostgroups_expr:
        return re.split(",| +", hostgroups_expr)
    return []
    def parse(self, value: str) -> Optional[Any]:
        try:
            if value.upper() == "NONE":
                return None
            if value.lower() == "infinity":
                return float("inf")

            if self.complex_type in ["INT", "RSMAP"]:
                return int(value)

            elif self.complex_type == "BOOL":
                try:
                    return bool(float(value))
                except ValueError:
                    if value.lower() in ["true", "false"]:
                        return value.lower() == "true"
                    else:
                        logging.warning(
                            "Could not parse '%s' for complex type %s - treating as string.",
                            value,
                            self.complex_type,
                        )
                    return value
            elif self.complex_type == "DOUBLE":
                return float(value)

            elif self.complex_type in ["RESTRING", "TIME", "STRING", "HOST"]:
                return value

            elif self.complex_type == "CSTRING":
                # TODO test
                return value.lower()  # case insensitve - we will just always lc

            elif self.complex_type == "MEMORY":
                size = value[-1]
                if size.isdigit():
                    mem = ht.Memory(float(value), "b")
                else:
                    mem = ht.Memory(float(value[:-1]), size)
                return mem.convert_to("g")
            else:
                if not self.__logged_type_warning:
                    logging.warning(
                        "Unknown complex type %s - treating as string.",
                        self.complex_type,
                    )
                    self.__logged_type_warning = True
                return value
        except Exception:
            if not self.__logged_parse_warning:
                logging.warning(
                    "Could not parse complex %s with value '%s'. Treating as string",
                    self,
                    value,
                )
                self.__logged_parse_warning = True
            return value
    def _pack_job(self, job: Job) -> Result:
        """
        1) will it ever fit? - check num nodes with any capacity
        2) does it have the proper resources? bucket.match(job.resources)
        3) order them
        4) tell the bucket to allocate X nodes - let the bucket figure out what is new and what is not.
        """
        # TODO break non-exclusive
        allocated_nodes: List[Node] = []
        slots_to_allocate = job.iterations_remaining
        assert job.iterations_remaining > 0

        available_buckets = self.node_mgr.get_buckets()
        # I don't want to fill up the log with rejecting placement groups
        # so just filter them here
        filter_by_colocated = [
            b for b in available_buckets
            if bool(b.placement_group) == job.colocated
        ]
        candidates_result = job.bucket_candidates(filter_by_colocated)

        if not candidates_result:
            # TODO log or something
            logging.warning("There are no resources to scale up for job %s",
                            job)
            logging.warning("See below:")
            for child_result in candidates_result.child_results or []:
                logging.warning("    %s", child_result.message)
            return candidates_result

        failure_reasons = self._handle_allocate(job,
                                                allocated_nodes,
                                                all_or_nothing=False)

        # we have allocated at least some tasks
        if allocated_nodes:
            assert allocated_nodes
            return AllocationResult("success",
                                    nodes=allocated_nodes,
                                    slots_allocated=slots_to_allocate)

        return AllocationResult("Failed", reasons=failure_reasons)
示例#16
0
def autoscale_hpcpack(
    config: Dict[str, Any],
    ctx_handler: DefaultContextHandler = None,
    hpcpack_rest_client: Optional[HpcRestClient] = None,
    dry_run: bool = False,
) -> None:

    if not hpcpack_rest_client:
        hpcpack_rest_client = new_rest_client(config)

    if ctx_handler:
        ctx_handler.set_context("[Sync-Status]")
    autoscale_config = config.get("autoscale") or {}
    # Load history info
    idle_timeout_seconds: int = autoscale_config.get("idle_timeout") or 600
    provisioning_timeout_seconds = autoscale_config.get("boot_timeout") or 1500
    statefile = autoscale_config.get(
        "statefile") or "C:\\cycle\\jetpack\\config\\autoscaler_state.txt"
    archivefile = autoscale_config.get(
        "archivefile") or "C:\\cycle\\jetpack\\config\\autoscaler_archive.txt"
    node_history = HpcNodeHistory(
        statefile=statefile,
        archivefile=archivefile,
        provisioning_timeout=provisioning_timeout_seconds,
        idle_timeout=idle_timeout_seconds)

    logging.info("Synchronizing the nodes between Cycle cloud and HPC Pack")

    # Initialize data of History info, cc nodes, HPC Pack nodes, HPC grow decisions
    # Get node list from Cycle Cloud
    def nodes_state_key(n: Node) -> Tuple[int, str, int]:
        try:
            state_pri = 1
            if n.state == 'Deallocated':
                state_pri = 2
            elif n.state == 'Stopping':
                state_pri = 3
            elif n.state == 'Terminating':
                state_pri = 4
            name, index = n.name.rsplit("-", 1)
            return (state_pri, name, int(index))
        except Exception:
            return (state_pri, n.name, 0)

    node_mgr: NodeManager = new_node_manager(config)
    for b in node_mgr.get_buckets():
        b.nodes.sort(key=nodes_state_key)
    cc_nodes: List[Node] = node_mgr.get_nodes()
    cc_nodes_by_id = partition_single(cc_nodes,
                                      func=lambda n: n.delayed_node_id.node_id)
    # Get compute node list and grow decision from HPC Pack
    hpc_node_groups = hpcpack_rest_client.list_node_groups()
    grow_decisions = hpcpack_rest_client.get_grow_decision()
    logging.info("grow decision: {}".format(grow_decisions))
    hpc_cn_nodes: List[HpcNode] = hpcpack_rest_client.list_computenodes()
    hpc_cn_nodes = [n for n in hpc_cn_nodes if n.active]

    # This function will link node history items, cc nodes and hpc nodes
    node_history.synchronize(cc_nodes, hpc_cn_nodes)

    cc_nodearrays = set([b.nodearray for b in node_mgr.get_buckets()])
    logging.info("Current node arrays in cyclecloud: {}".format(cc_nodearrays))

    # Create HPC node groups for CC node arrays
    cc_map_hpc_groups = ["CycleCloudNodes"] + list(cc_nodearrays)
    for cc_grp in cc_map_hpc_groups:
        if ci_notin(cc_grp, hpc_node_groups):
            logging.info("Create HPC node group: {}".format(cc_grp))
            hpcpack_rest_client.add_node_group(cc_grp,
                                               "Cycle Cloud Node group")

    # Add HPC nodes into corresponding node groups
    add_cc_tag_nodes = [
        n.name for n in hpc_cn_nodes if n.shall_addcyclecloudtag
    ]
    if len(add_cc_tag_nodes) > 0:
        logging.info(
            "Adding HPC nodes to node group CycleCloudNodes: {}".format(
                add_cc_tag_nodes))
        hpcpack_rest_client.add_node_to_node_group("CycleCloudNodes",
                                                   add_cc_tag_nodes)
    for cc_grp in list(cc_nodearrays):
        add_array_tag_nodes = [
            n.name for n in hpc_cn_nodes
            if n.shall_addnodearraytag and ci_equals(n.cc_nodearray, cc_grp)
        ]
        if len(add_array_tag_nodes) > 0:
            logging.info("Adding HPC nodes to node group {}: {}".format(
                cc_grp, add_array_tag_nodes))
            hpcpack_rest_client.add_node_to_node_group(cc_grp,
                                                       add_array_tag_nodes)

    # Possible values for HPC NodeState (states marked with * shall not occur for CC nodes):
    #   Unknown, Provisioning, Offline, Starting, Online, Draining, Rejected(*), Removing, NotDeployed(*), Stopping(*)
    # Remove the following HPC Pack nodes:
    #   1. The corresponding CC node already removed
    #   2. The corresponding CC node is stopped and HPC node is not assigned a node template
    # Take offline the following HPC Pack nodes:
    #   1. The corresponding CC node is stopped or is going to stop
    hpc_nodes_to_remove = [
        n.name for n in hpc_cn_nodes
        if n.removed_cc_node or (n.stopped_cc_node and not n.template_assigned)
    ]
    hpc_nodes_to_take_offline = [
        n.name for n in hpc_cn_nodes
        if n.stopped_cc_node and ci_equals(n.state, "Online")
    ]
    if len(hpc_nodes_to_remove) > 0:
        logging.info("Removing the HPC nodes: {}".format(hpc_nodes_to_remove))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.remove_nodes(hpc_nodes_to_remove)
    hpc_cn_nodes = [
        n for n in hpc_cn_nodes if not (n.stopped_cc_node or n.removed_cc_node)
    ]

    # Assign default node template for unapproved CC node
    hpc_nodes_to_assign_template = [
        n.name for n in hpc_cn_nodes
        if n.bound_cc_node and not n.template_assigned
    ]
    if len(hpc_nodes_to_assign_template) > 0:
        logging.info(
            "Assigning default node template for the HPC nodes: {}".format(
                hpc_nodes_to_assign_template))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.assign_default_compute_node_template(
                hpc_nodes_to_assign_template)

    ### Start scale up checking:
    logging.info("Start scale up checking ...")
    if ctx_handler:
        ctx_handler.set_context("[scale-up]")

    hpc_nodes_with_active_cc = [
        n for n in hpc_cn_nodes if n.template_assigned and n.bound_cc_node
    ]
    # Exclude the already online healthy HPC nodes before calling node_mgr.allocate
    for hpc_node in hpc_nodes_with_active_cc:
        if hpc_node.ready_for_job:
            hpc_node.bound_cc_node.closed = True

    # Terminate the provisioning timeout CC nodes
    cc_node_to_terminate: List[Node] = []
    for cc_node in cc_nodes:
        if ci_equals(cc_node.target_state, 'Deallocated') or ci_equals(
                cc_node.target_state,
                'Terminated') or cc_node.create_time_remaining:
            continue
        nhi = node_history.find(cc_id=cc_node.delayed_node_id.node_id)
        if not nhi.hpc_id:
            cc_node.closed = True
            cc_node_to_terminate.append(cc_node)
        else:
            hpc_node = ci_find_one(hpc_nodes_with_active_cc, nhi.hpc_id,
                                   lambda n: n.id)
            if hpc_node and hpc_node.error:
                cc_node.closed = True
                cc_node_to_terminate.append(cc_node)

    # "ComputeNodes", "CycleCloudNodes", "AzureIaaSNodes" are all treated as default
    # grow_by_socket not supported yet, treat as grow_by_node
    defaultGroups = [
        "Default", "ComputeNodes", "AzureIaaSNodes", "CycleCloudNodes"
    ]
    default_cores_to_grow = default_nodes_to_grow = 0.0

    # If the current CC nodes in the node array cannot satisfy the grow decision, the group is hungry
    # For a hungry group, no idle check is required if the node health is OK
    group_hungry: Dict[str, bool] = {}
    nbrNewNodes: int = 0
    grow_groups = list(grow_decisions.keys())
    for grp in grow_groups:
        tmp = grow_decisions.pop(grp)
        if not (tmp.cores_to_grow + tmp.nodes_to_grow + tmp.sockets_to_grow):
            continue
        if ci_in(grp, defaultGroups):
            default_cores_to_grow += tmp.cores_to_grow
            default_nodes_to_grow += tmp.nodes_to_grow + tmp.sockets_to_grow
            continue
        if ci_notin(grp, cc_nodearrays):
            logging.warning(
                "No mapping node array for the grow requirement {}:{}".format(
                    grp, grow_decisions[grp]))
            grow_decisions.pop(grp)
            continue
        group_hungry[grp] = False
        array = ci_lookup(grp, cc_nodearrays)
        selector = {'ncpus': 1, 'node.nodearray': [array]}
        target_cores = math.ceil(tmp.cores_to_grow)
        target_nodes = math.ceil(tmp.nodes_to_grow + tmp.sockets_to_grow)
        if target_nodes:
            logging.info("Allocate: {}  Target Nodes: {}".format(
                selector, target_nodes))
            result = node_mgr.allocate(selector, node_count=target_nodes)
            logging.info(result)
            if not result or result.total_slots < target_nodes:
                group_hungry[grp] = True
        if target_cores:
            logging.info("Allocate: {}  Target Cores: {}".format(
                selector, target_cores))
            result = node_mgr.allocate(selector, slot_count=target_cores)
            logging.info(result)
            if not result or result.total_slots < target_cores:
                group_hungry[grp] = True
        if len(node_mgr.new_nodes) > nbrNewNodes:
            group_hungry[grp] = True
        nbrNewNodes = len(node_mgr.new_nodes)

    # We then check the grow decision for the default node groups:
    checkShrinkNeeded = True
    growForDefaultGroup = True if default_nodes_to_grow or default_cores_to_grow else False
    if growForDefaultGroup:
        selector = {'ncpus': 1}
        if default_nodes_to_grow:
            target_nodes = math.ceil(default_nodes_to_grow)
            logging.info("Allocate: {}  Target Nodes: {}".format(
                selector, target_nodes))
            result = node_mgr.allocate({'ncpus': 1}, node_count=target_nodes)
            if not result or result.total_slots < target_nodes:
                checkShrinkNeeded = False
        if default_cores_to_grow:
            target_cores = math.ceil(default_cores_to_grow)
            logging.info("Allocate: {}  Target Cores: {}".format(
                selector, target_cores))
            result = node_mgr.allocate({'ncpus': 1}, slot_count=target_cores)
            if not result or result.total_slots < target_cores:
                checkShrinkNeeded = False
        if len(node_mgr.new_nodes) > nbrNewNodes:
            checkShrinkNeeded = False
        nbrNewNodes = len(node_mgr.new_nodes)

    if nbrNewNodes > 0:
        logging.info("Need to Allocate {} nodes in total".format(nbrNewNodes))
        if dry_run:
            logging.info("Dry-run: skipping node bootup...")
        else:
            logging.info("Allocating {} nodes in total".format(
                len(node_mgr.new_nodes)))
            bootup_result: BootupResult = node_mgr.bootup()
            logging.info(bootup_result)
            if bootup_result and bootup_result.nodes:
                for cc_node in bootup_result.nodes:
                    nhi = node_history.find(
                        cc_id=cc_node.delayed_node_id.node_id)
                    if nhi is None:
                        nhi = node_history.insert(
                            NodeHistoryItem(cc_node.delayed_node_id.node_id))
                    else:
                        nhi.restart()
    else:
        logging.info("No need to allocate new nodes ...")

    ### Start the shrink checking
    if ctx_handler:
        ctx_handler.set_context("[scale-down]")

    cc_node_to_shutdown: List[Node] = []
    if not checkShrinkNeeded:
        logging.info("No shrink check at this round ...")
        if not dry_run:
            for nhi in node_history.items:
                if not nhi.stopped and nhi.hpc_id:
                    nhi.idle_from = None
    else:
        logging.info("Start scale down checking ...")
        # By default, we check idle for active CC nodes in HPC Pack with 'Offline', 'Starting', 'Online', 'Draining' state
        candidate_idle_check_nodes = [
            n for n in hpc_nodes_with_active_cc
            if (not n.bound_cc_node.keep_alive)
            and ci_in(n.state, ["Offline", "Starting", "Online", "Draining"])
        ]

        # We can exclude some nodes from idle checking:
        # 1. If HPC Pack ask for grow in default node group(s), all healthy ONLINE nodes are considered as busy
        # 2. If HPC Pack ask for grow in certain node group, all healthy ONLINE nodes in that node group are considered as busy
        # 3. If a node group is hungry (new CC required or grow request not satisfied), no idle check needed for all nodes in that node array
        if growForDefaultGroup:
            candidate_idle_check_nodes = [
                n for n in candidate_idle_check_nodes if not n.ready_for_job
            ]
        for grp, hungry in group_hungry.items():
            if hungry:
                candidate_idle_check_nodes = [
                    n for n in candidate_idle_check_nodes
                    if not ci_equals(grp, n.cc_nodearray)
                ]
            elif not growForDefaultGroup:
                candidate_idle_check_nodes = [
                    n for n in candidate_idle_check_nodes
                    if not (ci_equals(grp, n.cc_nodearray) and n.ready_for_job)
                ]

        curtime = datetime.utcnow()
        # Offline node must be idle
        idle_node_names = [
            n.name for n in candidate_idle_check_nodes
            if ci_equals(n.state, 'Offline')
        ]
        if len(candidate_idle_check_nodes) > len(idle_node_names):
            idle_nodes = hpcpack_rest_client.check_nodes_idle([
                n.name for n in candidate_idle_check_nodes
                if not ci_equals(n.state, 'Offline')
            ])
            if len(idle_nodes) > 0:
                idle_node_names.extend([n.node_name for n in idle_nodes])

        if len(idle_node_names) > 0:
            logging.info(
                "The following node is idle: {}".format(idle_node_names))
        else:
            logging.info("No idle node found in this round.")

        retention_days = autoscale_config.get("vm_retention_days") or 7
        for nhi in node_history.items:
            if nhi.stopped:
                if nhi.stop_time + timedelta(
                        days=retention_days) < datetime.utcnow():
                    cc_node = cc_nodes_by_id.get(nhi.cc_id)
                    if cc_node is not None:
                        cc_node_to_terminate.append(cc_node)
                continue
            if ci_in(nhi.hostname, idle_node_names):
                if nhi.idle_from is None:
                    nhi.idle_from = curtime
                elif nhi.idle_timeout(idle_timeout_seconds):
                    nhi.stop_time = curtime
                    cc_node = cc_nodes_by_id.get(nhi.cc_id)
                    if cc_node is not None:
                        cc_node_to_shutdown.append(cc_node)
            else:
                nhi.idle_from = None

    shrinking_cc_node_ids = [
        n.delayed_node_id.node_id for n in cc_node_to_terminate
    ]
    shrinking_cc_node_ids.extend(
        [n.delayed_node_id.node_id for n in cc_node_to_shutdown])
    hpc_nodes_to_bring_online = [
        n.name for n in hpc_nodes_with_active_cc
        if ci_equals(n.state, 'Offline') and not n.error
        and ci_notin(n.cc_node_id, shrinking_cc_node_ids)
    ]
    hpc_nodes_to_take_offline.extend([
        n.name for n in hpc_nodes_with_active_cc
        if ci_equals(n.state, 'Online')
        and ci_in(n.cc_node_id, shrinking_cc_node_ids)
    ])
    if len(hpc_nodes_to_bring_online) > 0:
        logging.info("Bringing the HPC nodes online: {}".format(
            hpc_nodes_to_bring_online))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.bring_nodes_online(hpc_nodes_to_bring_online)

    if len(hpc_nodes_to_take_offline) > 0:
        logging.info("Taking the HPC nodes offline: {}".format(
            hpc_nodes_to_take_offline))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.take_nodes_offline(hpc_nodes_to_take_offline)

    if len(cc_node_to_shutdown) > 0:
        logging.info("Shut down the following Cycle cloud node: {}".format(
            [cn.name for cn in cc_node_to_shutdown]))
        if dry_run:
            logging.info("Dry-run: skip ...")
        else:
            node_mgr.shutdown_nodes(cc_node_to_shutdown)

    if len(cc_node_to_terminate) > 0:
        logging.info(
            "Terminating the following provisioning-timeout Cycle cloud nodes: {}"
            .format([cn.name for cn in cc_node_to_terminate]))
        if dry_run:
            logging.info("Dry-run: skip ...")
        else:
            node_mgr.terminate_nodes(cc_node_to_terminate)

    if not dry_run:
        logging.info("Save node history: {}".format(node_history))
        node_history.save()
示例#17
0
    def __init__(
        self,
        queue_config: Dict,
        scheduler: "GridEngineScheduler",
        pes: Dict[str, "ParallelEnvironment"],
        unbound_hostgroups: Dict[str, Hostgroup],
        complex_values: Dict[str, Dict],
        autoscale_enabled: bool = True,
    ) -> None:
        self.queue_config = queue_config
        self.complex_values = complex_values

        self.autoscale_enabled = autoscale_enabled
        assert isinstance(self.queue_config["hostlist"], str), self.queue_config[
            "hostlist"
        ]

        self.__hostlist = re.split(",| +", self.queue_config["hostlist"])
        self.__pe_to_hostgroups: Dict[str, List[str]] = {}
        self._pe_keys_cache: Dict[str, List[str]] = {}
        self.__parallel_environments: Dict[str, "ParallelEnvironment"] = {}
        self.__slots = parse_slots(self.queue_config.get("slots", ""))

        for hg, slots in self.__slots.items():
            if hg is None:
                continue
            if hg not in self.complex_values:
                self.complex_values[hg] = {}
            self.complex_values[hg]["slots"] = slots

        self.__seq_no = parse_seq_no(self.queue_config.get("seq_no", "0"))

        pe_list = parse_hostgroup_mapping(queue_config["pe_list"])

        def _get_seqno(hg_name: str) -> int:
            return self.__seq_no.get(hg_name, self.__seq_no.get(None, 0))  # type: ignore

        if scheduler.sort_by_seqno:
            potential_defaults = self.__hostlist + list(self.seq_no.keys())
            self.default_hg = sorted(potential_defaults, key=_get_seqno)[0]
        else:
            self.default_hg = self.__hostlist[0]

        for hostgroup, pes_for_hg in pe_list.items():
            for pe_name in pes_for_hg:
                if not pe_name:
                    continue

                if pe_name not in pes:
                    logging.warning(
                        'Unknown parallel environment %s defined in {"gridengine": {"pes": {"%s": {}}}} - %s. Skipping',
                        pe_name,
                        pe_name,
                        list(pes.keys()),
                    )
                    continue
                self.__parallel_environments[pe_name] = pes[pe_name]

                # common case, and let's avoid nlogn insertion
                if pe_name not in self.__pe_to_hostgroups:
                    self.__pe_to_hostgroups[pe_name] = [hostgroup]
                else:
                    all_hostgroups = self.__pe_to_hostgroups[pe_name]
                    if hostgroup not in all_hostgroups:
                        all_hostgroups.append(hostgroup)

        if queue_config["pe_list"] and queue_config["pe_list"].lower() != "none":
            assert self.__parallel_environments, queue_config["pe_list"]

        self.user_lists = parse_hostgroup_mapping(
            queue_config.get("user_lists") or "", self.hostlist_groups, filter_none=True
        )
        self.xuser_lists = parse_hostgroup_mapping(
            queue_config.get("xuser_lists") or "",
            self.hostlist_groups,
            filter_none=True,
        )
        self.projects = parse_hostgroup_mapping(
            queue_config.get("projects") or "", self.hostlist_groups, filter_none=True
        )
        self.xprojects = parse_hostgroup_mapping(
            queue_config.get("xprojects") or "", self.hostlist_groups, filter_none=True
        )

        hostgroup_mappings = (
            [list(self.complex_values.keys())]
            + list(self.__pe_to_hostgroups.values())
            + [list(self.seq_no.keys())]
            + [list(self.user_lists.keys())]
            + [list(self.xuser_lists.keys())]
            + [list(self.projects.keys())]
            + [list(self.xprojects.keys())]
        )
        for hg_names in hostgroup_mappings:
            for hg_name in hg_names:
                if hg_name and hg_name not in self.__hostlist:
                    self.__hostlist.append(hg_name)

        all_host_groups = set(self.hostlist)
        for pe in self.__parallel_environments.values():
            if pe.requires_placement_groups:
                all_host_groups = all_host_groups - set(
                    self.__pe_to_hostgroups.get(pe.name) or []
                )

        self.__ht_hostgroups = [x for x in list(all_host_groups) if x.startswith("@")]

        self.__bound_hostgroups: Dict[str, BoundHostgroup] = {}

        for hg_name in self.hostlist_groups:
            hg_seq_no = _get_seqno(hg_name)
            self.__bound_hostgroups[hg_name] = BoundHostgroup(
                self, unbound_hostgroups[hg_name], hg_seq_no
            )
示例#18
0
def parse_scheduler_node(
    ndict: Dict[str, Any],
    resource_definitions: Dict[str,
                               PBSProResourceDefinition]) -> SchedulerNode:
    """
    Implementation of parsing a single scheduler node.
    """
    parser = get_pbspro_parser()

    hostname = ndict["name"]
    res_avail = parser.parse_resources_available(ndict, filter_is_host=True)
    res_assigned = parser.parse_resources_assigned(ndict, filter_is_host=True)

    node = SchedulerNode(hostname, res_avail)
    jobs_expr = ndict.get("jobs", "")

    state = ndict.get("state") or ""

    if state == "free" and jobs_expr.strip():
        state = "partially-free"

    node.metadata["pbs_state"] = state

    if "down" in state:
        node.marked_for_deletion = True

    node.metadata["last_state_change_time"] = ndict.get(
        "last_state_change_time", "")

    for tok in jobs_expr.split(","):
        tok = tok.strip()
        if not tok:
            continue
        job_id_full, sub_job_id = tok.rsplit("/", 1)
        sched_host = ""
        if "." in job_id_full:
            job_id, sched_host = job_id_full.split(".", 1)
        else:
            job_id = job_id_full

        node.assign(job_id)

        if "job_ids_long" not in node.metadata:
            node.metadata["job_ids_long"] = [job_id_full]
        elif job_id_full not in node.metadata["job_ids_long"]:
            node.metadata["job_ids_long"].append(job_id_full)

    for res_name, value in res_assigned.items():
        resource = resource_definitions.get(res_name)

        if not resource or not resource.is_host:
            continue

        if resource.is_consumable:
            if res_name in node.available:
                node.available[res_name] -= value
            else:
                logging.warning(
                    "%s was not defined under resources_available, but was " +
                    "defined under resources_assigned for %s. Setting available to assigned.",
                    res_name,
                    node,
                )
                node.available[res_name] = value

    if "exclusive" in node.metadata["pbs_state"]:
        node.closed = True

    return node
示例#19
0
def parse_jobs(
    pbscmd: PBSCMD,
    resource_definitions: Dict[str, PBSProResourceDefinition],
    queues: Dict[str, PBSProQueue],
    resources_for_scheduling: Set[str],
) -> List[Job]:
    """
    Parses PBS qstat output and creates relevant hpc.autoscale.job.job.Job objects
    """
    parser = get_pbspro_parser()
    # alternate format triggered by
    # -a, -i, -G, -H, -M, -n, -r, -s, -T, or -u
    ret: List[Job] = []

    response: Dict = pbscmd.qstat_json("-f", "-t")

    for job_id, jdict in response.get("Jobs", {}).items():
        job_id = job_id.split(".")[0]

        job_state = jdict.get("job_state")
        if not job_state:
            logging.warning("No job_state defined for job %s. Skipping",
                            job_id)
            continue

        if job_state != PBSProJobStates.Queued:
            continue

        # ensure we don't autoscale jobs from disabled or non-started queues
        qname = jdict.get("queue")
        if not qname or qname not in queues:
            logging.warning("queue was not defined for job %s: ignoring",
                            job_id)
            continue

        queue: PBSProQueue = queues[qname]
        if not queue.enabled:
            logging.fine("Skipping job %s from disabled queue %s", job_id,
                         qname)
            continue

        if not queue.started:
            logging.fine("Skipping job %s from non-started queue %s", job_id,
                         qname)
            continue

        # handle array vs individual jobs
        if jdict.get("array"):
            iterations = parser.parse_range_size(
                jdict["array_indices_submitted"])
            remaining = parser.parse_range_size(
                jdict["array_indices_remaining"])
        elif "[" in job_id:
            continue
        else:
            iterations = 1
            remaining = 1

        res_list = jdict["Resource_List"]
        res_list["schedselect"] = jdict["schedselect"]
        rdict = parser.convert_resource_list(res_list)

        pack = (PackingStrategy.PACK if rdict["place"]["arrangement"]
                in ["free", "pack"] else PackingStrategy.SCATTER)

        # SMP style jobs
        is_smp = (rdict["place"].get("grouping") == "host"
                  or rdict["place"]["arrangement"] == "pack")

        # pack jobs do not need to define node_count

        node_count = int(rdict.get("nodect", "0"))

        smp_multiplier = 1

        if is_smp:
            smp_multiplier = max(1, iterations) * max(1, node_count)
            # for key, value in list(rdict.items()):
            #     if isinstance(value, (float, int)):
            #         value = value * smp_multiplier
            iterations = node_count = 1

        effective_node_count = max(node_count, 1)

        # htc jobs set ungrouped=true. see our default htcq
        colocated = (not is_smp and queue.uses_placement
                     and rdict.get("ungrouped", "false").lower() == "false")

        sharing = rdict["place"].get("sharing")

        for n, chunk_base in enumerate(rdict["schedselect"]):

            chunk: Dict[str, Any] = {}

            chunk.update(rdict)

            if "ncpus" not in chunk_base:
                chunk["ncpus"] = chunk["ncpus"] // effective_node_count

            if smp_multiplier > 1:
                for key, value in list(chunk_base.items()):
                    if isinstance(value, (int, float)):
                        chunk_base[key] = value * smp_multiplier
            # do this _after_ rdict, since the chunks
            # will override the top level resources
            # e.g. notice that ncpus=4. This will be the rdict value
            # but the chunks have ncpus=2
            # Resource_List.ncpus = 4
            # Resource_List.nodect = 2
            # Resource_List.select = 2:ncpus=2

            chunk.update(chunk_base)
            working_constraint: Dict[str, Any] = {}
            constraints = [working_constraint]

            if colocated:
                working_constraint["in-a-placement-group"] = True

            my_job_id = job_id
            if len(rdict["schedselect"]) > 1:
                if "." in job_id:
                    job_index, host = job_id.split(".", 1)
                    my_job_id = "{}+{}.{}".format(job_index, n, host)
                else:
                    my_job_id = "{}+{}".format(job_id, n)

            if sharing == "excl":
                working_constraint["exclusive-task"] = True
            elif sharing == "exclhost":
                working_constraint["exclusive"] = True

            job_resources = {}

            for rname, rvalue in chunk.items():
                if rname in ["select", "schedselect", "place", "nodect"]:
                    continue

                if rname not in resources_for_scheduling:
                    if rname == "skipcyclesubhook":
                        continue
                    logging.warning(
                        "Ignoring resource %s as it was not defined in sched_config",
                        rname,
                    )
                    continue

                # add all resource requests here. By that, I mean
                # non resource requests, like exclusive, should be ignored
                # required for get_non_host_constraints
                job_resources[rname] = rvalue

                resource_def = resource_definitions.get(rname)

                # constraints are for the node/host
                # queue/scheduler level ones will be added using
                # > queue.get_non_host_constraints(job_resource)
                if not resource_def or not resource_def.is_host:
                    continue

                if rname not in working_constraint:
                    working_constraint[rname] = rvalue
                else:
                    # hit a conflict, so start a new working cons
                    # so we maintain precedence
                    working_constraint = {rname: rvalue}
                    constraints.append(working_constraint)

            queue_constraints = queue.get_non_host_constraints(job_resources)
            constraints.extend(queue_constraints)

            job = Job(
                name=my_job_id,
                constraints=constraints,
                iterations=iterations,
                node_count=node_count,
                colocated=colocated,
                packing_strategy=pack,
            )
            job.iterations_remaining = remaining
            ret.append(job)

    return ret
示例#20
0
    def _validate_reverse_dns(self, node: Node) -> bool:
        # let's make sure the hostname is valid and reverse
        # dns compatible before adding to GE

        # if there is no private ip, then the hostname was removed, most likely
        # by azure DNS
        if not node.private_ip:
            return True

        try:
            addr_info = socket.gethostbyaddr(node.private_ip)
        except Exception as e:
            logging.error(
                "Could not convert private_ip(%s) to hostname using gethostbyaddr() for %s: %s",
                node.private_ip,
                node,
                str(e),
            )
            return False

        addr_info_ips = addr_info[-1]
        if isinstance(addr_info_ips, str):
            addr_info_ips = [addr_info_ips]

        if node.private_ip not in addr_info_ips:
            logging.warning(
                "%s has a hostname that does not match the" +
                " private_ip (%s) reported by cyclecloud (%s)! Skipping",
                node,
                addr_info_ips,
                node.private_ip,
            )
            return False

        expect_multiple_entries = (node.software_configuration.get(
            "cyclecloud", {}).get("hosts", {}).get("standalone_dns",
                                                   {}).get("enabled", True))

        addr_info_hostname = addr_info[0].split(".")[0]
        if addr_info_hostname.lower() != node.hostname.lower():
            if expect_multiple_entries:
                logging.warning(
                    "%s has a hostname that can not be queried via reverse" +
                    " dns (private_ip=%s cyclecloud hostname=%s reverse dns hostname=%s)."
                    + " This is common and usually repairs itself. Skipping",
                    node,
                    node.private_ip,
                    node.hostname,
                    addr_info_hostname,
                )
            else:
                logging.error(
                    "%s has a hostname that can not be queried via reverse" +
                    " dns (private_ip=%s cyclecloud hostname=%s reverse dns hostname=%s)."
                    +
                    " If you have an entry for this address in your /etc/hosts file, please remove it.",
                    node,
                    node.private_ip,
                    node.hostname,
                    addr_info_hostname,
                )
            return False
        return True
def _parse_complexes(
    autoscale_config: Dict, complex_lines: List[str]
) -> Dict[str, "Complex"]:
    relevant_complexes = None
    if autoscale_config:
        relevant_complexes = autoscale_config.get("gridengine", {}).get(
            "relevant_complexes"
        )
        if relevant_complexes:
            # special handling of ccnodeid, since it is something we
            # create for the user
            relevant_complexes = relevant_complexes + ["ccnodeid"]

        if relevant_complexes:
            logging.info(
                "Restricting complexes for autoscaling to %s", relevant_complexes
            )

    complexes: List[Complex] = []
    headers = complex_lines[0].lower().replace("#", "").split()

    required = set(["name", "type", "consumable"])
    missing = required - set(headers)
    if missing:
        logging.error(
            "Could not parse complex file as it is missing expected columns: %s."
            + " Autoscale likely will not work.",
            list(missing),
        )
        return {}

    for n, line in enumerate(complex_lines[1:]):
        if line.startswith("#"):
            continue
        toks = line.split()
        if len(toks) != len(headers):
            logging.warning(
                "Could not parse complex at line {} - ignoring: '{}'".format(n, line)
            )
            continue
        c = dict(zip(headers, toks))
        try:

            if (
                relevant_complexes
                and c["name"] not in relevant_complexes
                and c["shortcut"] not in relevant_complexes
            ):
                logging.trace(
                    "Ignoring complex %s because it was not defined in gridengine.relevant_complexes",
                    c["name"],
                )
                continue

            complex = Complex(
                name=c["name"],
                shortcut=c.get("shortcut", c["name"]),
                complex_type=c["type"],
                relop=c.get("relop", "=="),
                requestable=c.get("requestable", "YES").lower() == "yes",
                consumable=c.get("consumable", "YES").lower() == "yes",
                default=c.get("default"),
                urgency=int(c.get("urgency", 0)),
            )

            complexes.append(complex)

        except Exception:
            logging.exception("Could not parse complex %s - %s", line, c)

    # TODO test RDH
    ret = partition_single(complexes, lambda x: x.name)
    shortcut_dict = partition_single(complexes, lambda x: x.shortcut)
    ret.update(shortcut_dict)
    return ret
示例#22
0
def autoscale_pbspro(
    config: Dict[str, Any],
    pbs_env: Optional[PBSProEnvironment] = None,
    pbs_driver: Optional[PBSProDriver] = None,
    ctx_handler: Optional[DefaultContextHandler] = None,
    node_history: Optional[NodeHistory] = None,
    dry_run: bool = False,
) -> DemandResult:
    global _exit_code

    assert not config.get("read_only", False)
    if dry_run:
        logging.warning("Running pbs autoscaler in dry run mode")
        # allow multiple instances
        config["lock_file"] = None
        # put in read only mode
        config["read_only"] = True

    # interface to PBSPro, generally by cli
    if pbs_driver is None:
        # allow tests to pass in a mock
        pbs_driver = PBSProDriver(config)

    if pbs_env is None:
        pbs_env = envlib.from_driver(config, pbs_driver)

    pbs_driver.initialize()

    config = pbs_driver.preprocess_config(config)

    logging.debug("Driver = %s", pbs_driver)

    demand_calculator = calculate_demand(config, pbs_env, ctx_handler,
                                         node_history)

    failed_nodes = demand_calculator.node_mgr.get_failed_nodes()
    for node in pbs_env.scheduler_nodes:
        if "down" in node.metadata.get("pbs_state", ""):
            failed_nodes.append(node)
    pbs_driver.handle_failed_nodes(failed_nodes)

    demand_result = demand_calculator.finish()

    if ctx_handler:
        ctx_handler.set_context("[joining]")

    # details here are that we pass in nodes that matter (matched) and the driver figures out
    # which ones are new and need to be added
    joined = pbs_driver.add_nodes_to_cluster(
        [x for x in demand_result.compute_nodes if x.exists])

    pbs_driver.handle_post_join_cluster(joined)

    if ctx_handler:
        ctx_handler.set_context("[scaling]")

    # bootup all nodes. Optionally pass in a filtered list
    if demand_result.new_nodes:
        if not dry_run:
            demand_calculator.bootup()

    if not dry_run:
        demand_calculator.update_history()

    # we also tell the driver about nodes that are unmatched. It filters them out
    # and returns a list of ones we can delete.
    idle_timeout = int(config.get("idle_timeout", 300))
    boot_timeout = int(config.get("boot_timeout", 3600))
    logging.fine("Idle timeout is %s", idle_timeout)

    unmatched_for_5_mins = demand_calculator.find_unmatched_for(
        at_least=idle_timeout)
    timed_out_booting = demand_calculator.find_booting(at_least=boot_timeout)

    # I don't care about nodes that have keep_alive=true
    timed_out_booting = [n for n in timed_out_booting if not n.keep_alive]

    timed_out_to_deleted = []
    unmatched_nodes_to_delete = []

    if timed_out_booting:
        logging.info("The following nodes have timed out while booting: %s",
                     timed_out_booting)
        timed_out_to_deleted = pbs_driver.handle_boot_timeout(
            timed_out_booting) or []

    if unmatched_for_5_mins:
        logging.info("unmatched_for_5_mins %s", unmatched_for_5_mins)
        unmatched_nodes_to_delete = (
            pbs_driver.handle_draining(unmatched_for_5_mins) or [])

    nodes_to_delete = []
    for node in timed_out_to_deleted + unmatched_nodes_to_delete:
        if node.assignments:
            logging.warning(
                "%s has jobs assigned to it so we will take no action.", node)
            continue
        nodes_to_delete.append(node)

    if nodes_to_delete:
        try:
            logging.info("Deleting %s", [str(n) for n in nodes_to_delete])
            delete_result = demand_calculator.delete(nodes_to_delete)

            if delete_result:
                # in case it has anything to do after a node is deleted (usually just remove it from the cluster)
                pbs_driver.handle_post_delete(delete_result.nodes)
        except Exception as e:
            _exit_code = 1
            logging.warning(
                "Deletion failed, will retry on next iteration: %s", e)
            logging.exception(str(e))

    print_demand(config, demand_result, log=not dry_run)

    return demand_result
示例#23
0
from hpc.autoscale import hpclogging as logging
from hpc.autoscale.job.schedulernode import SchedulerNode as _SchedulerNode

logging.warning("hpc.autoscale.job.computenode is deprecated.")
logging.warning("Please use hpc.autoscale.job.schedulernode")

SchedulerNode = _SchedulerNode
示例#24
0
def new_demand_calculator(
    config: Dict,
    ge_env: Optional[GridEngineEnvironment] = None,
    ge_driver: Optional["GridEngineDriver"] = None,
    ctx_handler: Optional[DefaultContextHandler] = None,
    node_history: Optional[NodeHistory] = None,
    singleton_lock: Optional[SingletonLock] = None,
) -> DemandCalculator:
    if ge_env is None:
        ge_env = envlib.from_qconf(config)

    if ge_driver is None:
        ge_driver = new_driver(config, ge_env)

    if node_history is None:
        db_path = config.get("nodehistorydb")
        if not db_path:
            db_dir = "/opt/cycle/jetpack/system/bootstrap/gridengine"
            if not os.path.exists(db_dir):
                db_dir = os.getcwd()
            db_path = os.path.join(db_dir, "nodehistory.db")

        read_only = config.get("read_only", False)
        node_history = SQLiteNodeHistory(db_path, read_only)

        node_history.create_timeout = config.get("boot_timeout", 3600)
        node_history.last_match_timeout = config.get("idle_timeout", 300)

    demand_calculator = dcalclib.new_demand_calculator(
        config,
        existing_nodes=ge_env.nodes,
        node_history=node_history,
        node_queue=ge_driver.new_node_queue(),
        singleton_lock=singleton_lock,  # it will handle the none case
    )

    for name, default_complex in ge_env.complexes.items():
        if name == "slots":
            continue

        if default_complex.default is None:
            continue

        if not default_complex.requestable:
            continue

        logging.trace("Adding default resource %s=%s", name,
                      default_complex.default)
        demand_calculator.node_mgr.add_default_resource(
            {}, name, default_complex.default)

    ccnode_id_added = False
    slots_added: Set[str] = set()

    for bucket in demand_calculator.node_mgr.get_buckets():
        if "slots" not in bucket.resources and bucket.nodearray not in slots_added:
            default = (
                '"default_resources": [{"select": {"node.nodearray": "%s"}, "name": "slots", "value": "node.vcpu_count"}]'
                % (bucket.nodearray))
            demand_calculator.node_mgr.add_default_resource(
                selection={"node.nodearray": bucket.nodearray},
                resource_name="slots",
                default_value="node.vcpu_count",
            )

            logging.warning(
                """slots is not defined for bucket {}. Using the default, which you can add to your config: {}"""
                .format(bucket, default))
            slots_added.add(bucket.nodearray)

        # ccnodeid will almost certainly not be defined. It just needs
        # to be definede once, so we will add a default for all nodes
        # the first time we see it is missingg
        if "ccnodeid" not in bucket.resources and not ccnode_id_added:
            demand_calculator.node_mgr.add_default_resource(
                selection={},  # applies to all nodes
                resource_name="ccnodeid",
                default_value=lambda n: n.delayed_node_id.node_id,
            )
            ccnode_id_added = True

    return demand_calculator
示例#25
0
def autoscale_grid_engine(
    config: Dict[str, Any],
    ge_env: Optional[GridEngineEnvironment] = None,
    ge_driver: Optional["GridEngineDriver"] = None,
    ctx_handler: Optional[DefaultContextHandler] = None,
    node_history: Optional[NodeHistory] = None,
    dry_run: bool = False,
) -> DemandResult:
    global _exit_code

    assert not config.get("read_only", False)
    if dry_run:
        logging.warning("Running gridengine autoscaler in dry run mode")
        # allow multiple instances
        config["lock_file"] = None
        # put in read only mode
        config["read_only"] = True

    if ge_env is None:
        ge_env = envlib.from_qconf(config)

    # interface to GE, generally by cli
    if ge_driver is None:
        # allow tests to pass in a mock
        ge_driver = new_driver(config, ge_env)

    ge_driver.initialize_environment()

    config = ge_driver.preprocess_config(config)

    logging.fine("Driver = %s", ge_driver)

    invalid_nodes = []

    # we need an instance without any scheduler nodes, so don't
    # pass in the existing nodes.
    tmp_node_mgr = new_node_manager(config)

    by_hostname = partition_single(tmp_node_mgr.get_nodes(),
                                   lambda n: n.hostname_or_uuid)

    for node in ge_env.nodes:
        # many combinations of a u and other states. However,
        # as long as a and u are in there it is down
        state = node.metadata.get("state", "")
        cc_node = by_hostname.get(node.hostname)
        ccnodeid = node.resources.get("ccnodeid")
        if cc_node:
            if not ccnodeid or ccnodeid == cc_node.delayed_node_id.node_id:
                if cc_node.state in ["Preparing", "Acquiring"]:
                    continue
        if "a" in state and "u" in state:
            invalid_nodes.append(node)

    # nodes in error state must also be deleted
    nodes_to_delete = ge_driver.clean_hosts(invalid_nodes)
    for node in nodes_to_delete:
        ge_env.delete_node(node)

    demand_calculator = calculate_demand(config, ge_env, ge_driver,
                                         ctx_handler, node_history)

    ge_driver.handle_failed_nodes(
        demand_calculator.node_mgr.get_failed_nodes())

    demand_result = demand_calculator.finish()

    if ctx_handler:
        ctx_handler.set_context("[joining]")

    # details here are that we pass in nodes that matter (matched) and the driver figures out
    # which ones are new and need to be added via qconf
    joined = ge_driver.handle_join_cluster(
        [x for x in demand_result.compute_nodes if x.exists])

    ge_driver.handle_post_join_cluster(joined)

    if ctx_handler:
        ctx_handler.set_context("[scaling]")

    # bootup all nodes. Optionally pass in a filtered list
    if demand_result.new_nodes:
        if not dry_run:
            demand_calculator.bootup()

    if not dry_run:
        demand_calculator.update_history()

    # we also tell the driver about nodes that are unmatched. It filters them out
    # and returns a list of ones we can delete.
    idle_timeout = int(config.get("idle_timeout", 300))
    boot_timeout = int(config.get("boot_timeout", 3600))
    logging.fine("Idle timeout is %s", idle_timeout)

    unmatched_for_5_mins = demand_calculator.find_unmatched_for(
        at_least=idle_timeout)
    timed_out_booting = demand_calculator.find_booting(at_least=boot_timeout)

    # I don't care about nodes that have keep_alive=true
    timed_out_booting = [n for n in timed_out_booting if not n.keep_alive]

    timed_out_to_deleted = []
    unmatched_nodes_to_delete = []

    if timed_out_booting:
        logging.info("The following nodes have timed out while booting: %s",
                     timed_out_booting)
        timed_out_to_deleted = ge_driver.handle_boot_timeout(
            timed_out_booting) or []

    if unmatched_for_5_mins:
        node_expr = ", ".join([str(x) for x in unmatched_for_5_mins])
        logging.info("Unmatched for at least %s seconds: %s", idle_timeout,
                     node_expr)
        unmatched_nodes_to_delete = (
            ge_driver.handle_draining(unmatched_for_5_mins) or [])

    nodes_to_delete = []
    for node in timed_out_to_deleted + unmatched_nodes_to_delete:
        if node.assignments:
            logging.warning(
                "%s has jobs assigned to it so we will take no action.", node)
            continue
        nodes_to_delete.append(node)

    if nodes_to_delete:
        try:
            logging.info("Deleting %s", [str(n) for n in nodes_to_delete])
            delete_result = demand_calculator.delete(nodes_to_delete)

            if delete_result:
                # in case it has anything to do after a node is deleted (usually just remove it from the cluster)
                ge_driver.handle_post_delete(delete_result.nodes)
        except Exception as e:
            _exit_code = 1
            logging.warning(
                "Deletion failed, will retry on next iteration: %s", e)
            logging.exception(str(e))

    print_demand(config, demand_result, log=not dry_run)

    return demand_result