Exemplo n.º 1
0
    def bootup(self, nodes: Optional[List[Node]] = None) -> BootupResult:
        nodes = nodes if nodes is not None else self.get_demand().new_nodes
        if not nodes:
            logging.info("No nodes to bootup.")
            return BootupResult("success", OperationId(""), None)

        logging.debug("booting up %s", [n.name for n in nodes])
        return self.node_mgr.bootup(nodes)
Exemplo n.º 2
0
    def delete(self, nodes: Optional[List[Node]] = None) -> DeleteResult:
        nodes = nodes if nodes is not None else self.get_demand(
        ).unmatched_nodes
        if not nodes:
            logging.info("No nodes to delete.")
            return DeleteResult("success", OperationId(""), None)

        logging.debug("deleting %s", [n.name for n in nodes])
        return self.node_mgr.delete(nodes)
Exemplo n.º 3
0
def parse_scheduler_nodes(
    config: Dict,
    pbscmd: PBSCMD,
    resource_definitions: Dict[str, PBSProResourceDefinition],
) -> List[Node]:
    """
    Gets the current state of the nodes as the scheduler sees them, including resources,
    assigned resources, jobs currently running etc.
    """
    ret: List[Node] = []
    ignore_onprem = config.get("pbspro", {}).get("ignore_onprem", False)
    ignore_hostnames_re_expr = config.get("pbspro",
                                          {}).get("ignore_hostnames_re")
    ignore_hostnames_re = None
    if ignore_hostnames_re_expr:
        try:
            ignore_hostnames_re = re.compile(ignore_hostnames_re_expr)
        except:
            logging.exception(
                f"Could not parse {ignore_hostnames_re_expr} as a regular expression"
            )
    ignored_hostnames = []

    for ndict in pbscmd.pbsnodes_parsed("-a"):
        if ignore_hostnames_re and ignore_hostnames_re.match(ndict["name"]):
            ignored_hostnames.append(ndict["name"])
            continue

        if ignore_onprem and ndict.get("resources_available.ccnodeid"):
            ignored_hostnames.append(ndict["name"])
            continue

        node = parse_scheduler_node(ndict, resource_definitions)

        if not node.available.get("ccnodeid"):
            node.metadata["override_resources"] = False
            logging.fine(
                "'ccnodeid' is not defined so %s has not been joined to the cluster by the autoscaler"
                + " yet or this is not a CycleCloud managed node",
                node,
            )
        ret.append(node)

    if ignored_hostnames:
        if len(ignored_hostnames) < 5:
            logging.info(
                f"Ignored {len(ignored_hostnames)} hostnames. {','.join(ignored_hostnames)}"
            )
        else:
            logging.info(
                f"Ignored {len(ignored_hostnames)} hostnames. {','.join(ignored_hostnames[:5])}..."
            )
    return ret
Exemplo n.º 4
0
 def get_grow_decision(self) -> Dict[str, GrowDecision]:
     res = self._post(self.get_grow_decision.__name__,
                      self.GROW_DECISION_API_ROUTE,
                      data=None)
     logging.info(res.content)
     grow_decision_dict = {
         k: GrowDecision(v['CoresToGrow'], v['NodesToGrow'],
                         v['SocketsToGrow'])
         for k, v in json.loads(res.content).items()
     }
     if not ci_in("Default", grow_decision_dict):
         grow_decision_dict["Default"] = GrowDecision(0.0, 0.0, 0.0)
     return grow_decision_dict
Exemplo n.º 5
0
    def retire_records(self,
                       timeout: int = (7 * 24 * 60 * 60),
                       commit: bool = True) -> None:
        if self.read_only:
            return

        retire_omega = self.now() - timeout
        cursor = self._execute(
            """DELETE from nodes where delete_time is not null AND delete_time < {} AND delete_time > 0"""
            .format(retire_omega))
        deleted = list(cursor)
        logging.info("Deleted %s nodes - %s", len(deleted),
                     [(d[0], d[1]) for d in deleted])
        if commit:
            self.conn.commit()
Exemplo n.º 6
0
    def get_hostgroups_for_pe(self, pe_name: str) -> List[str]:
        if not self.has_pe(pe_name):
            raise RuntimeError(
                "Queue {} does not support parallel_environment {}".format(
                    self.qname, pe_name))
        ret = self.__pe_to_hostgroups[pe_name]
        if set(ret) == set([None]):
            logging.info(
                "PE %s has no specified hostgroup and will be put into hostgroup %s",
                pe_name,
                self.default_hg,
            )
            self.__pe_to_hostgroups[pe_name] = ret = [self.default_hg]

        return [h for h in ret if h]
Exemplo n.º 7
0
 def _post(self, function_name: str, function_route: str, data) -> Response:
     headers = {"Content-Type": "application/json"}
     url = function_route.format(self.hostname)
     res = requests.post(url,
                         data=data,
                         headers=headers,
                         verify=False,
                         cert=self._pem)
     try:
         res.raise_for_status()
         logging.info("{} resp: {}".format(function_name, str(res.content)))
         return res
     except HTTPError:
         logging.error("{}: status_code:{} content:{}".format(
             function_name, res.status_code, res.content))
         raise
Exemplo n.º 8
0
    def handle_draining(self, nodes: List[Node]) -> List[Node]:
        # TODO batch these up, but keep it underneath the
        # max arg limit
        ret = []
        for node in nodes:
            if not node.hostname:
                logging.info("Node %s has no hostname.", node)
                continue

            # TODO implement after we have resources added back in
            # what about deleting partially initialized nodes? I think we
            # just need to skip non-managed nodes
            # if not node.resources.get("ccnodeid"):
            #     continue

            if not node.managed and not node.resources.get("ccnodeid"):
                logging.debug("Ignoring attempt to drain unmanaged %s", node)
                continue

            if "offline" in node.metadata.get("pbs_state", ""):
                if node.assignments:
                    logging.info("Node %s has jobs still running on it.", node)
                    # node is already 'offline' i.e. draining, but a job is still running
                    continue
                else:
                    # ok - it is offline _and_ no jobs are running on it.
                    ret.append(node)
            else:
                try:
                    self.pbscmd.pbsnodes("-o", node.hostname)

                    # # Due to a delay in when pbsnodes -o exits to when pbsnodes -a
                    # # actually reports an offline state, w ewill just optimistically set it to offline
                    # # otherwise ~50% of the time you get the old state (free)
                    # response = self.pbscmd.pbsnodes_parsed("-a", node.hostname)
                    # if response:
                    #     node.metadata["pbs_state"] = response[0]["state"]
                    node.metadata["pbs_state"] = "offline"

                except CalledProcessError as e:
                    if node.private_ip:
                        logging.error(
                            "'pbsnodes -o %s' failed and this node will not be scaled down: %s",
                            node.hostname,
                            e,
                        )
        return ret
Exemplo n.º 9
0
    def handle_failed_nodes(self, nodes: List[Node]) -> List[Node]:
        to_delete = []
        to_drain = []
        now = datetime.datetime.now()

        for node in nodes:

            if node.state == "Failed":
                node.closed = True
                to_delete.append(node)
                continue

            if not node.resources.get("ccnodeid"):
                logging.fine(
                    "Attempting to delete %s but ccnodeid is not set yet.",
                    node)
                continue

            job_state = node.metadata.get("pbs_state", "")
            if "down" in job_state:
                node.closed = True
                # no private_ip == no dns entry, so we can safely remove it
                if "offline" in job_state or not node.private_ip:
                    to_delete.append(node)
                else:
                    if self._down_long_enough(now, node):
                        to_drain.append(node)

        if to_drain:
            logging.info("Draining down nodes: %s", to_drain)
            self.handle_draining(to_drain)

        if to_delete:
            logging.info("Deleting down,offline nodes: %s", to_drain)
            return self.handle_post_delete(to_delete)
        return []
Exemplo n.º 10
0
    def add_nodes_to_cluster(self, nodes: List[Node]) -> List[Node]:
        self.initialize()

        all_nodes = self.pbscmd.pbsnodes_parsed("-a")
        by_ccnodeid = partition(
            all_nodes, lambda x: x.get("resources_available.ccnodeid"))

        ret = []
        for node in nodes:
            if not node.hostname:
                continue

            if not node.private_ip:
                continue

            node_id = node.delayed_node_id.node_id
            if not node_id:
                logging.error("%s does not have a nodeid! Skipping", node)
                continue

            if node_id in by_ccnodeid:
                skip_node = False
                for ndict in by_ccnodeid[node_id]:
                    if ndict["name"].lower() != node.hostname.lower():
                        logging.error(
                            "Duplicate hostname found for the same node id! %s and %s. See 'valid_hostnames' in autoscale as a possible workaround.",
                            node,
                            ndict["name"],
                        )
                        skip_node = True
                        break
                if skip_node:
                    continue

            if not is_valid_hostname(self.config, node):
                continue

            if not self._validate_reverse_dns(node):
                logging.fine(
                    "%s still has a hostname that can not be looked via reverse dns. This should repair itself.",
                    node,
                )
                continue

            if not node.resources.get("ccnodeid"):
                logging.info(
                    "%s is not managed by CycleCloud, or at least 'ccnodeid' is not defined. Ignoring",
                    node,
                )
                continue
            try:
                try:
                    ndicts = self.pbscmd.qmgr_parsed("list", "node",
                                                     node.hostname)
                    if ndicts and ndicts[0].get(
                            "resources_available.ccnodeid"):
                        logging.info(
                            "ccnodeid is already defined on %s. Skipping",
                            node)
                        continue
                    # TODO RDH should we just delete it instead?
                    logging.info(
                        "%s already exists in this cluster. Setting resources.",
                        node)
                except CalledProcessError:
                    logging.info(
                        "%s does not exist in this cluster yet. Creating.",
                        node)
                    self.pbscmd.qmgr("create", "node", node.hostname)

                for res_name, res_value in node.resources.items():
                    # we set ccnodeid last, so that we can see that we have completely joined a node
                    # if and only if ccnodeid has been set
                    if res_name == "ccnodeid":
                        continue

                    if res_value is None:
                        continue

                    # TODO RDH track down
                    if res_name == "group_id" and res_value == "None":
                        continue

                    # skip things like host which are useful to set default resources on non-existent
                    # nodes for autoscale packing, but not on actual nodes
                    if res_name in self.read_only_resources:
                        continue

                    if res_name not in self.resource_definitions:
                        # TODO bump to a warning?
                        logging.fine(
                            "%s is an unknown PBS resource for node %s. Skipping this resource",
                            res_name,
                            node,
                        )
                        continue
                    res_value_str: str

                    # pbs size does not support decimals
                    if isinstance(res_value, ht.Size):
                        res_value_str = "{}{}".format(int(res_value.value),
                                                      res_value.magnitude)
                    elif isinstance(res_value, bool):
                        res_value_str = "1" if bool else "0"
                    else:
                        res_value_str = str(res_value)

                    self.pbscmd.qmgr(
                        "set",
                        "node",
                        node.hostname,
                        "resources_available.{}={}".format(
                            res_name, res_value_str),
                    )

                self.pbscmd.qmgr(
                    "set",
                    "node",
                    node.hostname,
                    "resources_available.{}={}".format(
                        "ccnodeid", node.resources["ccnodeid"]),
                )
                self.pbscmd.pbsnodes("-r", node.hostname)
                ret.append(node)
            except SubprocessError as e:
                logging.error(
                    "Could not fully add %s to cluster: %s. Will attempt next cycle",
                    node,
                    e,
                )

        return ret
Exemplo n.º 11
0
def autoscale_hpcpack(
    config: Dict[str, Any],
    ctx_handler: DefaultContextHandler = None,
    hpcpack_rest_client: Optional[HpcRestClient] = None,
    dry_run: bool = False,
) -> None:

    if not hpcpack_rest_client:
        hpcpack_rest_client = new_rest_client(config)

    if ctx_handler:
        ctx_handler.set_context("[Sync-Status]")
    autoscale_config = config.get("autoscale") or {}
    # Load history info
    idle_timeout_seconds: int = autoscale_config.get("idle_timeout") or 600
    provisioning_timeout_seconds = autoscale_config.get("boot_timeout") or 1500
    statefile = autoscale_config.get(
        "statefile") or "C:\\cycle\\jetpack\\config\\autoscaler_state.txt"
    archivefile = autoscale_config.get(
        "archivefile") or "C:\\cycle\\jetpack\\config\\autoscaler_archive.txt"
    node_history = HpcNodeHistory(
        statefile=statefile,
        archivefile=archivefile,
        provisioning_timeout=provisioning_timeout_seconds,
        idle_timeout=idle_timeout_seconds)

    logging.info("Synchronizing the nodes between Cycle cloud and HPC Pack")

    # Initialize data of History info, cc nodes, HPC Pack nodes, HPC grow decisions
    # Get node list from Cycle Cloud
    def nodes_state_key(n: Node) -> Tuple[int, str, int]:
        try:
            state_pri = 1
            if n.state == 'Deallocated':
                state_pri = 2
            elif n.state == 'Stopping':
                state_pri = 3
            elif n.state == 'Terminating':
                state_pri = 4
            name, index = n.name.rsplit("-", 1)
            return (state_pri, name, int(index))
        except Exception:
            return (state_pri, n.name, 0)

    node_mgr: NodeManager = new_node_manager(config)
    for b in node_mgr.get_buckets():
        b.nodes.sort(key=nodes_state_key)
    cc_nodes: List[Node] = node_mgr.get_nodes()
    cc_nodes_by_id = partition_single(cc_nodes,
                                      func=lambda n: n.delayed_node_id.node_id)
    # Get compute node list and grow decision from HPC Pack
    hpc_node_groups = hpcpack_rest_client.list_node_groups()
    grow_decisions = hpcpack_rest_client.get_grow_decision()
    logging.info("grow decision: {}".format(grow_decisions))
    hpc_cn_nodes: List[HpcNode] = hpcpack_rest_client.list_computenodes()
    hpc_cn_nodes = [n for n in hpc_cn_nodes if n.active]

    # This function will link node history items, cc nodes and hpc nodes
    node_history.synchronize(cc_nodes, hpc_cn_nodes)

    cc_nodearrays = set([b.nodearray for b in node_mgr.get_buckets()])
    logging.info("Current node arrays in cyclecloud: {}".format(cc_nodearrays))

    # Create HPC node groups for CC node arrays
    cc_map_hpc_groups = ["CycleCloudNodes"] + list(cc_nodearrays)
    for cc_grp in cc_map_hpc_groups:
        if ci_notin(cc_grp, hpc_node_groups):
            logging.info("Create HPC node group: {}".format(cc_grp))
            hpcpack_rest_client.add_node_group(cc_grp,
                                               "Cycle Cloud Node group")

    # Add HPC nodes into corresponding node groups
    add_cc_tag_nodes = [
        n.name for n in hpc_cn_nodes if n.shall_addcyclecloudtag
    ]
    if len(add_cc_tag_nodes) > 0:
        logging.info(
            "Adding HPC nodes to node group CycleCloudNodes: {}".format(
                add_cc_tag_nodes))
        hpcpack_rest_client.add_node_to_node_group("CycleCloudNodes",
                                                   add_cc_tag_nodes)
    for cc_grp in list(cc_nodearrays):
        add_array_tag_nodes = [
            n.name for n in hpc_cn_nodes
            if n.shall_addnodearraytag and ci_equals(n.cc_nodearray, cc_grp)
        ]
        if len(add_array_tag_nodes) > 0:
            logging.info("Adding HPC nodes to node group {}: {}".format(
                cc_grp, add_array_tag_nodes))
            hpcpack_rest_client.add_node_to_node_group(cc_grp,
                                                       add_array_tag_nodes)

    # Possible values for HPC NodeState (states marked with * shall not occur for CC nodes):
    #   Unknown, Provisioning, Offline, Starting, Online, Draining, Rejected(*), Removing, NotDeployed(*), Stopping(*)
    # Remove the following HPC Pack nodes:
    #   1. The corresponding CC node already removed
    #   2. The corresponding CC node is stopped and HPC node is not assigned a node template
    # Take offline the following HPC Pack nodes:
    #   1. The corresponding CC node is stopped or is going to stop
    hpc_nodes_to_remove = [
        n.name for n in hpc_cn_nodes
        if n.removed_cc_node or (n.stopped_cc_node and not n.template_assigned)
    ]
    hpc_nodes_to_take_offline = [
        n.name for n in hpc_cn_nodes
        if n.stopped_cc_node and ci_equals(n.state, "Online")
    ]
    if len(hpc_nodes_to_remove) > 0:
        logging.info("Removing the HPC nodes: {}".format(hpc_nodes_to_remove))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.remove_nodes(hpc_nodes_to_remove)
    hpc_cn_nodes = [
        n for n in hpc_cn_nodes if not (n.stopped_cc_node or n.removed_cc_node)
    ]

    # Assign default node template for unapproved CC node
    hpc_nodes_to_assign_template = [
        n.name for n in hpc_cn_nodes
        if n.bound_cc_node and not n.template_assigned
    ]
    if len(hpc_nodes_to_assign_template) > 0:
        logging.info(
            "Assigning default node template for the HPC nodes: {}".format(
                hpc_nodes_to_assign_template))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.assign_default_compute_node_template(
                hpc_nodes_to_assign_template)

    ### Start scale up checking:
    logging.info("Start scale up checking ...")
    if ctx_handler:
        ctx_handler.set_context("[scale-up]")

    hpc_nodes_with_active_cc = [
        n for n in hpc_cn_nodes if n.template_assigned and n.bound_cc_node
    ]
    # Exclude the already online healthy HPC nodes before calling node_mgr.allocate
    for hpc_node in hpc_nodes_with_active_cc:
        if hpc_node.ready_for_job:
            hpc_node.bound_cc_node.closed = True

    # Terminate the provisioning timeout CC nodes
    cc_node_to_terminate: List[Node] = []
    for cc_node in cc_nodes:
        if ci_equals(cc_node.target_state, 'Deallocated') or ci_equals(
                cc_node.target_state,
                'Terminated') or cc_node.create_time_remaining:
            continue
        nhi = node_history.find(cc_id=cc_node.delayed_node_id.node_id)
        if not nhi.hpc_id:
            cc_node.closed = True
            cc_node_to_terminate.append(cc_node)
        else:
            hpc_node = ci_find_one(hpc_nodes_with_active_cc, nhi.hpc_id,
                                   lambda n: n.id)
            if hpc_node and hpc_node.error:
                cc_node.closed = True
                cc_node_to_terminate.append(cc_node)

    # "ComputeNodes", "CycleCloudNodes", "AzureIaaSNodes" are all treated as default
    # grow_by_socket not supported yet, treat as grow_by_node
    defaultGroups = [
        "Default", "ComputeNodes", "AzureIaaSNodes", "CycleCloudNodes"
    ]
    default_cores_to_grow = default_nodes_to_grow = 0.0

    # If the current CC nodes in the node array cannot satisfy the grow decision, the group is hungry
    # For a hungry group, no idle check is required if the node health is OK
    group_hungry: Dict[str, bool] = {}
    nbrNewNodes: int = 0
    grow_groups = list(grow_decisions.keys())
    for grp in grow_groups:
        tmp = grow_decisions.pop(grp)
        if not (tmp.cores_to_grow + tmp.nodes_to_grow + tmp.sockets_to_grow):
            continue
        if ci_in(grp, defaultGroups):
            default_cores_to_grow += tmp.cores_to_grow
            default_nodes_to_grow += tmp.nodes_to_grow + tmp.sockets_to_grow
            continue
        if ci_notin(grp, cc_nodearrays):
            logging.warning(
                "No mapping node array for the grow requirement {}:{}".format(
                    grp, grow_decisions[grp]))
            grow_decisions.pop(grp)
            continue
        group_hungry[grp] = False
        array = ci_lookup(grp, cc_nodearrays)
        selector = {'ncpus': 1, 'node.nodearray': [array]}
        target_cores = math.ceil(tmp.cores_to_grow)
        target_nodes = math.ceil(tmp.nodes_to_grow + tmp.sockets_to_grow)
        if target_nodes:
            logging.info("Allocate: {}  Target Nodes: {}".format(
                selector, target_nodes))
            result = node_mgr.allocate(selector, node_count=target_nodes)
            logging.info(result)
            if not result or result.total_slots < target_nodes:
                group_hungry[grp] = True
        if target_cores:
            logging.info("Allocate: {}  Target Cores: {}".format(
                selector, target_cores))
            result = node_mgr.allocate(selector, slot_count=target_cores)
            logging.info(result)
            if not result or result.total_slots < target_cores:
                group_hungry[grp] = True
        if len(node_mgr.new_nodes) > nbrNewNodes:
            group_hungry[grp] = True
        nbrNewNodes = len(node_mgr.new_nodes)

    # We then check the grow decision for the default node groups:
    checkShrinkNeeded = True
    growForDefaultGroup = True if default_nodes_to_grow or default_cores_to_grow else False
    if growForDefaultGroup:
        selector = {'ncpus': 1}
        if default_nodes_to_grow:
            target_nodes = math.ceil(default_nodes_to_grow)
            logging.info("Allocate: {}  Target Nodes: {}".format(
                selector, target_nodes))
            result = node_mgr.allocate({'ncpus': 1}, node_count=target_nodes)
            if not result or result.total_slots < target_nodes:
                checkShrinkNeeded = False
        if default_cores_to_grow:
            target_cores = math.ceil(default_cores_to_grow)
            logging.info("Allocate: {}  Target Cores: {}".format(
                selector, target_cores))
            result = node_mgr.allocate({'ncpus': 1}, slot_count=target_cores)
            if not result or result.total_slots < target_cores:
                checkShrinkNeeded = False
        if len(node_mgr.new_nodes) > nbrNewNodes:
            checkShrinkNeeded = False
        nbrNewNodes = len(node_mgr.new_nodes)

    if nbrNewNodes > 0:
        logging.info("Need to Allocate {} nodes in total".format(nbrNewNodes))
        if dry_run:
            logging.info("Dry-run: skipping node bootup...")
        else:
            logging.info("Allocating {} nodes in total".format(
                len(node_mgr.new_nodes)))
            bootup_result: BootupResult = node_mgr.bootup()
            logging.info(bootup_result)
            if bootup_result and bootup_result.nodes:
                for cc_node in bootup_result.nodes:
                    nhi = node_history.find(
                        cc_id=cc_node.delayed_node_id.node_id)
                    if nhi is None:
                        nhi = node_history.insert(
                            NodeHistoryItem(cc_node.delayed_node_id.node_id))
                    else:
                        nhi.restart()
    else:
        logging.info("No need to allocate new nodes ...")

    ### Start the shrink checking
    if ctx_handler:
        ctx_handler.set_context("[scale-down]")

    cc_node_to_shutdown: List[Node] = []
    if not checkShrinkNeeded:
        logging.info("No shrink check at this round ...")
        if not dry_run:
            for nhi in node_history.items:
                if not nhi.stopped and nhi.hpc_id:
                    nhi.idle_from = None
    else:
        logging.info("Start scale down checking ...")
        # By default, we check idle for active CC nodes in HPC Pack with 'Offline', 'Starting', 'Online', 'Draining' state
        candidate_idle_check_nodes = [
            n for n in hpc_nodes_with_active_cc
            if (not n.bound_cc_node.keep_alive)
            and ci_in(n.state, ["Offline", "Starting", "Online", "Draining"])
        ]

        # We can exclude some nodes from idle checking:
        # 1. If HPC Pack ask for grow in default node group(s), all healthy ONLINE nodes are considered as busy
        # 2. If HPC Pack ask for grow in certain node group, all healthy ONLINE nodes in that node group are considered as busy
        # 3. If a node group is hungry (new CC required or grow request not satisfied), no idle check needed for all nodes in that node array
        if growForDefaultGroup:
            candidate_idle_check_nodes = [
                n for n in candidate_idle_check_nodes if not n.ready_for_job
            ]
        for grp, hungry in group_hungry.items():
            if hungry:
                candidate_idle_check_nodes = [
                    n for n in candidate_idle_check_nodes
                    if not ci_equals(grp, n.cc_nodearray)
                ]
            elif not growForDefaultGroup:
                candidate_idle_check_nodes = [
                    n for n in candidate_idle_check_nodes
                    if not (ci_equals(grp, n.cc_nodearray) and n.ready_for_job)
                ]

        curtime = datetime.utcnow()
        # Offline node must be idle
        idle_node_names = [
            n.name for n in candidate_idle_check_nodes
            if ci_equals(n.state, 'Offline')
        ]
        if len(candidate_idle_check_nodes) > len(idle_node_names):
            idle_nodes = hpcpack_rest_client.check_nodes_idle([
                n.name for n in candidate_idle_check_nodes
                if not ci_equals(n.state, 'Offline')
            ])
            if len(idle_nodes) > 0:
                idle_node_names.extend([n.node_name for n in idle_nodes])

        if len(idle_node_names) > 0:
            logging.info(
                "The following node is idle: {}".format(idle_node_names))
        else:
            logging.info("No idle node found in this round.")

        retention_days = autoscale_config.get("vm_retention_days") or 7
        for nhi in node_history.items:
            if nhi.stopped:
                if nhi.stop_time + timedelta(
                        days=retention_days) < datetime.utcnow():
                    cc_node = cc_nodes_by_id.get(nhi.cc_id)
                    if cc_node is not None:
                        cc_node_to_terminate.append(cc_node)
                continue
            if ci_in(nhi.hostname, idle_node_names):
                if nhi.idle_from is None:
                    nhi.idle_from = curtime
                elif nhi.idle_timeout(idle_timeout_seconds):
                    nhi.stop_time = curtime
                    cc_node = cc_nodes_by_id.get(nhi.cc_id)
                    if cc_node is not None:
                        cc_node_to_shutdown.append(cc_node)
            else:
                nhi.idle_from = None

    shrinking_cc_node_ids = [
        n.delayed_node_id.node_id for n in cc_node_to_terminate
    ]
    shrinking_cc_node_ids.extend(
        [n.delayed_node_id.node_id for n in cc_node_to_shutdown])
    hpc_nodes_to_bring_online = [
        n.name for n in hpc_nodes_with_active_cc
        if ci_equals(n.state, 'Offline') and not n.error
        and ci_notin(n.cc_node_id, shrinking_cc_node_ids)
    ]
    hpc_nodes_to_take_offline.extend([
        n.name for n in hpc_nodes_with_active_cc
        if ci_equals(n.state, 'Online')
        and ci_in(n.cc_node_id, shrinking_cc_node_ids)
    ])
    if len(hpc_nodes_to_bring_online) > 0:
        logging.info("Bringing the HPC nodes online: {}".format(
            hpc_nodes_to_bring_online))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.bring_nodes_online(hpc_nodes_to_bring_online)

    if len(hpc_nodes_to_take_offline) > 0:
        logging.info("Taking the HPC nodes offline: {}".format(
            hpc_nodes_to_take_offline))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.take_nodes_offline(hpc_nodes_to_take_offline)

    if len(cc_node_to_shutdown) > 0:
        logging.info("Shut down the following Cycle cloud node: {}".format(
            [cn.name for cn in cc_node_to_shutdown]))
        if dry_run:
            logging.info("Dry-run: skip ...")
        else:
            node_mgr.shutdown_nodes(cc_node_to_shutdown)

    if len(cc_node_to_terminate) > 0:
        logging.info(
            "Terminating the following provisioning-timeout Cycle cloud nodes: {}"
            .format([cn.name for cn in cc_node_to_terminate]))
        if dry_run:
            logging.info("Dry-run: skip ...")
        else:
            node_mgr.terminate_nodes(cc_node_to_terminate)

    if not dry_run:
        logging.info("Save node history: {}".format(node_history))
        node_history.save()
Exemplo n.º 12
0

def new_rest_client(config: Dict[str, Any]) -> HpcRestClient:

    hpcpack_config = config.get('hpcpack') or {}
    hpc_pem_file = hpcpack_config.get('pem')
    hn_hostname = hpcpack_config.get('hn_hostname')
    return HpcRestClient(config, pem=hpc_pem_file, hostname=hn_hostname)


if __name__ == "__main__":

    config_file = ""
    if len(sys.argv) > 1:
        config_file = sys.argv[1]

    dry_run = False
    if len(sys.argv) > 2:
        dry_run = ci_in(sys.argv[2], ['true', 'dryrun'])

    ctx_handler = register_result_handler(
        DefaultContextHandler("[initialization]"))
    config = load_config(config_file)
    logging.initialize_logging(config)
    logging.info(
        "------------------------------------------------------------------------"
    )
    if config["autoscale"]["start_enabled"]:
        autoscale_hpcpack(config, ctx_handler=ctx_handler, dry_run=dry_run)
    else:
        logging.info("Autoscaler is not enabled")
Exemplo n.º 13
0
def autoscale_grid_engine(
    config: Dict[str, Any],
    ge_env: Optional[GridEngineEnvironment] = None,
    ge_driver: Optional["GridEngineDriver"] = None,
    ctx_handler: Optional[DefaultContextHandler] = None,
    node_history: Optional[NodeHistory] = None,
    dry_run: bool = False,
) -> DemandResult:
    global _exit_code

    assert not config.get("read_only", False)
    if dry_run:
        logging.warning("Running gridengine autoscaler in dry run mode")
        # allow multiple instances
        config["lock_file"] = None
        # put in read only mode
        config["read_only"] = True

    if ge_env is None:
        ge_env = envlib.from_qconf(config)

    # interface to GE, generally by cli
    if ge_driver is None:
        # allow tests to pass in a mock
        ge_driver = new_driver(config, ge_env)

    ge_driver.initialize_environment()

    config = ge_driver.preprocess_config(config)

    logging.fine("Driver = %s", ge_driver)

    invalid_nodes = []

    # we need an instance without any scheduler nodes, so don't
    # pass in the existing nodes.
    tmp_node_mgr = new_node_manager(config)

    by_hostname = partition_single(tmp_node_mgr.get_nodes(),
                                   lambda n: n.hostname_or_uuid)

    for node in ge_env.nodes:
        # many combinations of a u and other states. However,
        # as long as a and u are in there it is down
        state = node.metadata.get("state", "")
        cc_node = by_hostname.get(node.hostname)
        ccnodeid = node.resources.get("ccnodeid")
        if cc_node:
            if not ccnodeid or ccnodeid == cc_node.delayed_node_id.node_id:
                if cc_node.state in ["Preparing", "Acquiring"]:
                    continue
        if "a" in state and "u" in state:
            invalid_nodes.append(node)

    # nodes in error state must also be deleted
    nodes_to_delete = ge_driver.clean_hosts(invalid_nodes)
    for node in nodes_to_delete:
        ge_env.delete_node(node)

    demand_calculator = calculate_demand(config, ge_env, ge_driver,
                                         ctx_handler, node_history)

    ge_driver.handle_failed_nodes(
        demand_calculator.node_mgr.get_failed_nodes())

    demand_result = demand_calculator.finish()

    if ctx_handler:
        ctx_handler.set_context("[joining]")

    # details here are that we pass in nodes that matter (matched) and the driver figures out
    # which ones are new and need to be added via qconf
    joined = ge_driver.handle_join_cluster(
        [x for x in demand_result.compute_nodes if x.exists])

    ge_driver.handle_post_join_cluster(joined)

    if ctx_handler:
        ctx_handler.set_context("[scaling]")

    # bootup all nodes. Optionally pass in a filtered list
    if demand_result.new_nodes:
        if not dry_run:
            demand_calculator.bootup()

    if not dry_run:
        demand_calculator.update_history()

    # we also tell the driver about nodes that are unmatched. It filters them out
    # and returns a list of ones we can delete.
    idle_timeout = int(config.get("idle_timeout", 300))
    boot_timeout = int(config.get("boot_timeout", 3600))
    logging.fine("Idle timeout is %s", idle_timeout)

    unmatched_for_5_mins = demand_calculator.find_unmatched_for(
        at_least=idle_timeout)
    timed_out_booting = demand_calculator.find_booting(at_least=boot_timeout)

    # I don't care about nodes that have keep_alive=true
    timed_out_booting = [n for n in timed_out_booting if not n.keep_alive]

    timed_out_to_deleted = []
    unmatched_nodes_to_delete = []

    if timed_out_booting:
        logging.info("The following nodes have timed out while booting: %s",
                     timed_out_booting)
        timed_out_to_deleted = ge_driver.handle_boot_timeout(
            timed_out_booting) or []

    if unmatched_for_5_mins:
        node_expr = ", ".join([str(x) for x in unmatched_for_5_mins])
        logging.info("Unmatched for at least %s seconds: %s", idle_timeout,
                     node_expr)
        unmatched_nodes_to_delete = (
            ge_driver.handle_draining(unmatched_for_5_mins) or [])

    nodes_to_delete = []
    for node in timed_out_to_deleted + unmatched_nodes_to_delete:
        if node.assignments:
            logging.warning(
                "%s has jobs assigned to it so we will take no action.", node)
            continue
        nodes_to_delete.append(node)

    if nodes_to_delete:
        try:
            logging.info("Deleting %s", [str(n) for n in nodes_to_delete])
            delete_result = demand_calculator.delete(nodes_to_delete)

            if delete_result:
                # in case it has anything to do after a node is deleted (usually just remove it from the cluster)
                ge_driver.handle_post_delete(delete_result.nodes)
        except Exception as e:
            _exit_code = 1
            logging.warning(
                "Deletion failed, will retry on next iteration: %s", e)
            logging.exception(str(e))

    print_demand(config, demand_result, log=not dry_run)

    return demand_result
Exemplo n.º 14
0
def _parse_complexes(
    autoscale_config: Dict, complex_lines: List[str]
) -> Dict[str, "Complex"]:
    relevant_complexes = None
    if autoscale_config:
        relevant_complexes = autoscale_config.get("gridengine", {}).get(
            "relevant_complexes"
        )
        if relevant_complexes:
            # special handling of ccnodeid, since it is something we
            # create for the user
            relevant_complexes = relevant_complexes + ["ccnodeid"]

        if relevant_complexes:
            logging.info(
                "Restricting complexes for autoscaling to %s", relevant_complexes
            )

    complexes: List[Complex] = []
    headers = complex_lines[0].lower().replace("#", "").split()

    required = set(["name", "type", "consumable"])
    missing = required - set(headers)
    if missing:
        logging.error(
            "Could not parse complex file as it is missing expected columns: %s."
            + " Autoscale likely will not work.",
            list(missing),
        )
        return {}

    for n, line in enumerate(complex_lines[1:]):
        if line.startswith("#"):
            continue
        toks = line.split()
        if len(toks) != len(headers):
            logging.warning(
                "Could not parse complex at line {} - ignoring: '{}'".format(n, line)
            )
            continue
        c = dict(zip(headers, toks))
        try:

            if (
                relevant_complexes
                and c["name"] not in relevant_complexes
                and c["shortcut"] not in relevant_complexes
            ):
                logging.trace(
                    "Ignoring complex %s because it was not defined in gridengine.relevant_complexes",
                    c["name"],
                )
                continue

            complex = Complex(
                name=c["name"],
                shortcut=c.get("shortcut", c["name"]),
                complex_type=c["type"],
                relop=c.get("relop", "=="),
                requestable=c.get("requestable", "YES").lower() == "yes",
                consumable=c.get("consumable", "YES").lower() == "yes",
                default=c.get("default"),
                urgency=int(c.get("urgency", 0)),
            )

            complexes.append(complex)

        except Exception:
            logging.exception("Could not parse complex %s - %s", line, c)

    # TODO test RDH
    ret = partition_single(complexes, lambda x: x.name)
    shortcut_dict = partition_single(complexes, lambda x: x.shortcut)
    ret.update(shortcut_dict)
    return ret
Exemplo n.º 15
0
def autoscale_pbspro(
    config: Dict[str, Any],
    pbs_env: Optional[PBSProEnvironment] = None,
    pbs_driver: Optional[PBSProDriver] = None,
    ctx_handler: Optional[DefaultContextHandler] = None,
    node_history: Optional[NodeHistory] = None,
    dry_run: bool = False,
) -> DemandResult:
    global _exit_code

    assert not config.get("read_only", False)
    if dry_run:
        logging.warning("Running pbs autoscaler in dry run mode")
        # allow multiple instances
        config["lock_file"] = None
        # put in read only mode
        config["read_only"] = True

    # interface to PBSPro, generally by cli
    if pbs_driver is None:
        # allow tests to pass in a mock
        pbs_driver = PBSProDriver(config)

    if pbs_env is None:
        pbs_env = envlib.from_driver(config, pbs_driver)

    pbs_driver.initialize()

    config = pbs_driver.preprocess_config(config)

    logging.debug("Driver = %s", pbs_driver)

    demand_calculator = calculate_demand(config, pbs_env, ctx_handler,
                                         node_history)

    failed_nodes = demand_calculator.node_mgr.get_failed_nodes()
    for node in pbs_env.scheduler_nodes:
        if "down" in node.metadata.get("pbs_state", ""):
            failed_nodes.append(node)
    pbs_driver.handle_failed_nodes(failed_nodes)

    demand_result = demand_calculator.finish()

    if ctx_handler:
        ctx_handler.set_context("[joining]")

    # details here are that we pass in nodes that matter (matched) and the driver figures out
    # which ones are new and need to be added
    joined = pbs_driver.add_nodes_to_cluster(
        [x for x in demand_result.compute_nodes if x.exists])

    pbs_driver.handle_post_join_cluster(joined)

    if ctx_handler:
        ctx_handler.set_context("[scaling]")

    # bootup all nodes. Optionally pass in a filtered list
    if demand_result.new_nodes:
        if not dry_run:
            demand_calculator.bootup()

    if not dry_run:
        demand_calculator.update_history()

    # we also tell the driver about nodes that are unmatched. It filters them out
    # and returns a list of ones we can delete.
    idle_timeout = int(config.get("idle_timeout", 300))
    boot_timeout = int(config.get("boot_timeout", 3600))
    logging.fine("Idle timeout is %s", idle_timeout)

    unmatched_for_5_mins = demand_calculator.find_unmatched_for(
        at_least=idle_timeout)
    timed_out_booting = demand_calculator.find_booting(at_least=boot_timeout)

    # I don't care about nodes that have keep_alive=true
    timed_out_booting = [n for n in timed_out_booting if not n.keep_alive]

    timed_out_to_deleted = []
    unmatched_nodes_to_delete = []

    if timed_out_booting:
        logging.info("The following nodes have timed out while booting: %s",
                     timed_out_booting)
        timed_out_to_deleted = pbs_driver.handle_boot_timeout(
            timed_out_booting) or []

    if unmatched_for_5_mins:
        logging.info("unmatched_for_5_mins %s", unmatched_for_5_mins)
        unmatched_nodes_to_delete = (
            pbs_driver.handle_draining(unmatched_for_5_mins) or [])

    nodes_to_delete = []
    for node in timed_out_to_deleted + unmatched_nodes_to_delete:
        if node.assignments:
            logging.warning(
                "%s has jobs assigned to it so we will take no action.", node)
            continue
        nodes_to_delete.append(node)

    if nodes_to_delete:
        try:
            logging.info("Deleting %s", [str(n) for n in nodes_to_delete])
            delete_result = demand_calculator.delete(nodes_to_delete)

            if delete_result:
                # in case it has anything to do after a node is deleted (usually just remove it from the cluster)
                pbs_driver.handle_post_delete(delete_result.nodes)
        except Exception as e:
            _exit_code = 1
            logging.warning(
                "Deletion failed, will retry on next iteration: %s", e)
            logging.exception(str(e))

    print_demand(config, demand_result, log=not dry_run)

    return demand_result