示例#1
0
def test_config_based_default_resources(bindings) -> None:
    config = {"_mock_bindings": bindings, "default_resources": []}
    node_mgr = new_node_manager(config)
    for b in node_mgr.get_buckets():
        assert "blah" not in b.resources

    config["default_resources"].append({
        "select": {},
        "name": "blah",
        "value": "node.pcpu_count"
    })

    node_mgr = new_node_manager(config)
    for b in node_mgr.get_buckets():
        assert b.resources["blah"] == b.pcpu_count
示例#2
0
def onprem_burst_node_mgr() -> None:
    # Unlike the SchedulerNode above, here we can define the vcpu_count and memory
    # for the onprem nodes.
    onprem_res = {"onprem": True, "nodetype": "A"}
    onprem001 = UnmanagedNode(
        "onprem001", vcpu_count=16, memory=Memory(128, "g"), resources=onprem_res
    )
    onprem002 = UnmanagedNode(
        "onprem002", vcpu_count=32, memory=Memory(256, "g"), resources=onprem_res
    )

    node_mgr = new_node_manager(CONFIG, existing_nodes=[onprem001, onprem002])
    node_mgr.add_default_resource({"node.nodearray": "htc"}, "nodetype", "A")

    result = node_mgr.allocate({"nodetype": "A"}, node_count=5)
    assert result
    if result:
        print(
            "Allocated {} nodes, {} are new".format(
                len(result.nodes), len(node_mgr.new_nodes)
            )
        )
    else:
        print("Failed! {}".format(result))

    if not DRY_RUN:
        node_mgr.bootup()
示例#3
0
def target_counts_node_mgr() -> None:
    """
    Handle a mixture of 'target count' style allocation of ncpus and nodes via the
    NodeMgr.
    """
    node_mgr = new_node_manager(CONFIG)

    result = node_mgr.allocate({"node.nodearray": "htc"}, node_count=2)

    if result:
        print("Allocated {} nodes.".format(len(result.nodes)))
    else:
        print("Failed! {}".format(result))

    result = node_mgr.allocate({"node.nodearray": "htc", "memgb": 1}, slot_count=128)

    if result:
        print("Allocated {} nodes".format(len(result.nodes)))
    else:
        print("Failed! {}".format(result))

    # you can also do Memory.value_of("100g")
    # or even (Memory.value_of("1g") * 100), as the memory object is supposed
    # to be used as a number

    print("Allocated {} nodes in total".format(len(node_mgr.new_nodes)))

    if not DRY_RUN:
        node_mgr.bootup()
示例#4
0
def new_demand_calculator(
    config: Union[str, dict],
    existing_nodes: Optional[List[SchedulerNode]] = None,
    node_mgr: Optional[NodeManager] = None,
    node_history: Optional[NodeHistory] = None,
    disable_default_resources: bool = False,
    node_queue: Optional[NodeQueue] = None,
    singleton_lock: Optional[SingletonLock] = NullSingletonLock(),
) -> DemandCalculator:
    config_dict = load_config(config)

    existing_nodes = existing_nodes or []

    if node_mgr is None:
        node_mgr = new_node_manager(
            config_dict,
            disable_default_resources=disable_default_resources,
        )
    else:
        logging.initialize_logging(config_dict)

        if not disable_default_resources:
            node_mgr.set_system_default_resources()

    node_history = node_history or SQLiteNodeHistory()

    if singleton_lock is None:
        singleton_lock = new_singleton_lock(config_dict)

    dc = DemandCalculator(node_mgr, node_history, node_queue, singleton_lock)

    dc.update_scheduler_nodes(existing_nodes)

    return dc
示例#5
0
    def _setup_shell_locals(self, config: Dict) -> Dict:
        """
        Provides read only interactive shell. type hpcpackhelp()
        in the shell for more information
        """
        ctx = DefaultContextHandler("[interactive-readonly]")

        def hpcpackhelp() -> None:
            print(
                "config               - dict representing autoscale configuration."
            )
            print(
                "cli                  - object representing the CLI commands")
            print(
                "node_mgr             - ScaleLib NodeManager - interacts with CycleCloud for all node related"
                +
                "                    activities - creation, deletion, limits, buckets etc."
            )
            print("hpcpackhelp            - This help function")

        shell_locals = {
            "config": config,
            "cli": self,
            "ctx": ctx,
            "node_mgr": new_node_manager(config),
            "hpcpackhelp": hpcpackhelp,
        }

        return shell_locals
示例#6
0
def default_resources() -> None:
    """
        have printer print out every resource for ever bucket. (and get_columns)
        add gpus by default (node.gpu_count, node.gpu_sku, node.gpu_vendor)
    """
    # now we will disable the default resources, ncpus/pcpus/gpus etc
    # and define them ourselves.
    node_mgr = new_node_manager(CONFIG, disable_default_resources=True)

    # let's define gpus for every node
    # then, for nodes that actually have a gpu, let's set the pcpus
    # to equal the number of gpus * 2

    # define ngpus
    node_mgr.add_default_resource({}, "ngpus", "node.gpu_count")
    # also could have just passed in a lambda/function
    # node_mgr.add_default_resource({}, "gpus", lambda node: node.gpu_count)

    # now that ngpus is defined, we can use ngpus: 1 here to filter out nodes that
    # have at least one ngpu. Let's set pcpus to 2 * ngpus
    node_mgr.add_default_resource({"ngpus": 1}, "pcpus",
                                  lambda node: node.resources["ngpus"] * 2)

    # and lastly, for all other nodes, we will apply the system defaults
    node_mgr.set_system_default_resources()

    has_gpu = node_mgr.example_node("southcentralus", "Standard_NV24")
    no_gpu = node_mgr.example_node("southcentralus", "Standard_F16s")

    print(has_gpu.vm_size,
          " -> %(ngpus)s ngpus %(pcpus)s pcpus" % has_gpu.resources)
    print(no_gpu.vm_size,
          " -> %(ngpus)s ngpus %(pcpus)s pcpus" % no_gpu.resources)
示例#7
0
def target_counts_node_mgr() -> None:
    """
        break allocate to add_nodes / scale_to

    """
    node_mgr = new_node_manager(CONFIG)

    result = node_mgr.allocate({"node.nodearray": "htc"}, node_count=2)

    if result:
        print("Allocated {} nodes".format(len(result.nodes)))
    else:
        print("Failed! {}".format(result))

    result = node_mgr.allocate({
        "node.nodearray": "htc",
        "memgb": 1
    },
                               slot_count=128)

    if result:
        print("Allocated {} nodes".format(len(result.nodes)))
    else:
        print("Failed! {}".format(result))

    # you can also do Memory.value_of("100g")
    # or even (Memory.value_of("1g") * 100), as the memory object is supposed
    # to be used as a number

    print("Allocated {} nodes in total".format(len(node_mgr.new_nodes)))

    if not DRY_RUN:
        node_mgr.bootup()
示例#8
0
def test_custom_node_attrs_and_node_config() -> None:
    b = MockClusterBinding()
    b.add_nodearray("htc", {},
                    software_configuration={"myscheduler": {
                        "A": 1
                    }})
    b.add_bucket("htc", "Standard_F2", 10, 10)
    b.add_node("htc-1", "htc")
    node_mgr = new_node_manager({"_mock_bindings": b})
    (existing_node, ) = node_mgr.get_nodes()

    try:
        existing_node.node_attribute_overrides["willfail"] = 123
        assert False
    except TypeError:
        pass

    result = node_mgr.allocate({"exclusive": True}, node_count=2)
    assert result
    (node, ) = [n for n in result.nodes if not n.exists]

    assert node.software_configuration.get("test_thing") is None
    node.node_attribute_overrides["Configuration"] = {"test_thing": "is set"}
    assert node.software_configuration.get("test_thing") == "is set"
    try:
        node.software_configuration["willfail"] = 123
        assert not node.software_configuration.get("willfail")
    except TypeError:
        pass

    # we won't handle dict merges here.
    assert node.software_configuration.get("myscheduler") == {"A": 1}

    node.node_attribute_overrides["Configuration"] = {"myscheduler": {"B": 2}}
    assert node.software_configuration.get("myscheduler") == {"B": 2}

    # if you want to add to the existing software_configuration, use
    # the node.software_configuration
    node.node_attribute_overrides["Configuration"][
        "myscsheduler"] = node.software_configuration.get("myscheduler", {})
    node.node_attribute_overrides["Configuration"]["myscheduler"]["B"] = 2

    node.node_attribute_overrides["Configuration"] = {
        "myscheduler": {
            "A": 1,
            "B": 2
        }
    }

    node.software_configuration["willsucceed"] = 123
    node.exists = True
    try:
        node.software_configuration["willfail"] = 123
        assert False
    except TypeError:
        pass
示例#9
0
def resources(config: Dict, constraint_expr: str) -> None:
    ge_env = environment.from_qconf(config)
    ge_driver = autoscaler.new_driver(config, ge_env)
    node_mgr = new_node_manager(config, existing_nodes=ge_driver)

    filtered = _query_with_constraints(config, constraint_expr, node_mgr.get_buckets())

    columns = set()
    for node in filtered:
        columns.update(set(node.resources.keys()))
        columns.update(set(node.resources.keys()))
    config["output_columns"]
示例#10
0
def manual_node_mgmt() -> None:
    node_mgr = new_node_manager(CONFIG)

    assert node_mgr.allocate({}, node_count=2)
    if node_mgr.new_nodes:
        node_mgr.bootup()

    node1, node2 = node_mgr.get_nodes()

    assert node1 in node_mgr.get_nodes()
    res = node_mgr.delete([node1])
    print(res)
    assert node1 not in node_mgr.get_nodes(), node_mgr.get_nodes()
示例#11
0
def default_resources() -> None:
    """
    An example of how to programmatically create default resources.
    """
    # now we will disable the default resources, ncpus/pcpus/gpus etc
    # and define them ourselves.
    CONFIG["disable_default_resources"] = True
    node_mgr = new_node_manager(CONFIG)

    # let's define gpus for every node
    # then, for nodes that actually have a gpu, let's set the pcpus
    # to equal the number of gpus * 2

    # define ngpus
    node_mgr.add_default_resource({}, "ngpus", "node.gpu_count")
    # also could have just passed in a lambda/function
    # node_mgr.add_default_resource({}, "gpus", lambda node: node.gpu_count)

    # now that ngpus is defined, we can use ngpus: 1 here to filter out nodes that
    # have at least one ngpu. Let's set pcpus to 2 * ngpus
    node_mgr.add_default_resource(
        selection={"ngpus": 1},
        resource_name="pcpus",
        default_value=lambda node: node.resources["ngpus"] * 2,
    )
    # and for nodes without GPUs, let's just use the actual pcpu_count
    node_mgr.add_default_resource(
        selection={"ngpus": 0},
        resource_name="pcpus",
        default_value=lambda node: node.pcpu_count,
    )
    # note that a blank selection would have accomplished the same thing, as default resources
    # are processed in order.
    # node_mgr.add_default_resource({}, "pcpus", lambda node: node.pcpu_count)

    has_gpu = node_mgr.example_node("southcentralus", "Standard_NV24")
    no_gpu = node_mgr.example_node("southcentralus", "Standard_F16s")

    print(
        has_gpu.vm_size,
        "(pcpu_count=%s)" % has_gpu.pcpu_count,
        "-> %(ngpus)s ngpus %(pcpus)s pcpus" % has_gpu.resources,
    )
    print(
        no_gpu.vm_size,
        "(pcpu_count=%s)" % has_gpu.pcpu_count,
        "-> %(ngpus)s ngpus %(pcpus)s pcpus" % no_gpu.resources,
    )
示例#12
0
def buckets(
    config: Dict,
    constraint_expr: str,
    output_columns: Optional[List[str]] = None,
    output_format: Optional[str] = None,
) -> None:
    """Prints out autoscale bucket information, like limits etc"""
    ge_env = environment.from_qconf(config)
    ge_driver = autoscaler.new_driver(config, ge_env)
    config = ge_driver.preprocess_config(config)
    node_mgr = new_node_manager(config)
    specified_output_columns = output_columns
    output_columns = output_columns or [
        "nodearray",
        "placement_group",
        "vm_size",
        "vcpu_count",
        "pcpu_count",
        "memory",
        "available_count",
    ]

    if specified_output_columns is None:
        for bucket in node_mgr.get_buckets():
            for resource_name in bucket.resources:
                if resource_name not in output_columns:
                    output_columns.append(resource_name)

        for attr in dir(bucket.limits):
            if attr[0].isalpha() and "count" in attr:
                value = getattr(bucket.limits, attr)
                if isinstance(value, int):
                    bucket.resources[attr] = value
                    bucket.example_node._resources[attr] = value

    filtered = _query_with_constraints(config, constraint_expr,
                                       node_mgr.get_buckets())

    demand_result = DemandResult([], [f.example_node for f in filtered], [],
                                 [])

    if "all" in output_columns:
        output_columns = ["all"]
    config["output_columns"] = output_columns

    autoscaler.print_demand(config, demand_result, output_columns,
                            output_format)
示例#13
0
def new_demand_calculator(
    config: Dict,
    pbs_env: Optional[PBSProEnvironment] = None,
    pbs_driver: Optional["PBSProDriver"] = None,
    ctx_handler: Optional[DefaultContextHandler] = None,
    node_history: Optional[NodeHistory] = None,
    singleton_lock: Optional[SingletonLock] = None,
) -> DemandCalculator:
    if pbs_driver is None:
        pbs_driver = PBSProDriver(config)

    if pbs_env is None:
        pbs_env = envlib.from_driver(config, pbs_driver)

    if node_history is None:
        node_history = pbs_driver.new_node_history(config)

    # keep it as a config
    node_mgr = new_node_manager(config, existing_nodes=pbs_env.scheduler_nodes)
    pbs_driver.preprocess_node_mgr(config, node_mgr)
    singleton_lock = singleton_lock or pbs_driver.new_singleton_lock(config)
    assert singleton_lock

    demand_calculator = dcalclib.new_demand_calculator(
        config,
        node_mgr=node_mgr,
        node_history=node_history,
        node_queue=pbs_driver.new_node_queue(config),
        singleton_lock=singleton_lock,  # it will handle the none case,
        existing_nodes=pbs_env.scheduler_nodes,
    )

    ccnode_id_added = False

    for bucket in demand_calculator.node_mgr.get_buckets():

        # ccnodeid will almost certainly not be defined. It just needs
        # to be definede once, so we will add a default for all nodes
        # the first time we see it is missingg
        if "ccnodeid" not in bucket.resources and not ccnode_id_added:
            hpc.autoscale.job.driver.add_ccnodeid_default_resource(
                demand_calculator.node_mgr)
            ccnode_id_added = True

    return demand_calculator
示例#14
0
def test_basic() -> None:
    binding = MockClusterBinding()
    binding.add_nodearray("hpc", {"ncpus": "node.vcpu_count"})
    binding.add_bucket("hpc", "Standard_F4", max_count=100, available_count=100)
    node_mgr = new_node_manager({"_mock_bindings": binding})
    bucket = node_mgr.get_buckets()[0]

    assert 100 == bucket.available_count
    bucket.decrement(5)
    assert 95 == bucket.available_count
    bucket.rollback()
    assert 100 == bucket.available_count
    bucket.decrement(5)
    assert 95 == bucket.available_count
    bucket.commit()
    assert 95 == bucket.available_count
    bucket.decrement(5)
    assert 90 == bucket.available_count
    bucket.rollback()
    assert 95 == bucket.available_count
示例#15
0
def autoscale_grid_engine(
    config: Dict[str, Any],
    ge_env: Optional[GridEngineEnvironment] = None,
    ge_driver: Optional["GridEngineDriver"] = None,
    ctx_handler: Optional[DefaultContextHandler] = None,
    node_history: Optional[NodeHistory] = None,
    dry_run: bool = False,
) -> DemandResult:
    global _exit_code

    assert not config.get("read_only", False)
    if dry_run:
        logging.warning("Running gridengine autoscaler in dry run mode")
        # allow multiple instances
        config["lock_file"] = None
        # put in read only mode
        config["read_only"] = True

    if ge_env is None:
        ge_env = envlib.from_qconf(config)

    # interface to GE, generally by cli
    if ge_driver is None:
        # allow tests to pass in a mock
        ge_driver = new_driver(config, ge_env)

    ge_driver.initialize_environment()

    config = ge_driver.preprocess_config(config)

    logging.fine("Driver = %s", ge_driver)

    invalid_nodes = []

    # we need an instance without any scheduler nodes, so don't
    # pass in the existing nodes.
    tmp_node_mgr = new_node_manager(config)

    by_hostname = partition_single(tmp_node_mgr.get_nodes(),
                                   lambda n: n.hostname_or_uuid)

    for node in ge_env.nodes:
        # many combinations of a u and other states. However,
        # as long as a and u are in there it is down
        state = node.metadata.get("state", "")
        cc_node = by_hostname.get(node.hostname)
        ccnodeid = node.resources.get("ccnodeid")
        if cc_node:
            if not ccnodeid or ccnodeid == cc_node.delayed_node_id.node_id:
                if cc_node.state in ["Preparing", "Acquiring"]:
                    continue
        if "a" in state and "u" in state:
            invalid_nodes.append(node)

    # nodes in error state must also be deleted
    nodes_to_delete = ge_driver.clean_hosts(invalid_nodes)
    for node in nodes_to_delete:
        ge_env.delete_node(node)

    demand_calculator = calculate_demand(config, ge_env, ge_driver,
                                         ctx_handler, node_history)

    ge_driver.handle_failed_nodes(
        demand_calculator.node_mgr.get_failed_nodes())

    demand_result = demand_calculator.finish()

    if ctx_handler:
        ctx_handler.set_context("[joining]")

    # details here are that we pass in nodes that matter (matched) and the driver figures out
    # which ones are new and need to be added via qconf
    joined = ge_driver.handle_join_cluster(
        [x for x in demand_result.compute_nodes if x.exists])

    ge_driver.handle_post_join_cluster(joined)

    if ctx_handler:
        ctx_handler.set_context("[scaling]")

    # bootup all nodes. Optionally pass in a filtered list
    if demand_result.new_nodes:
        if not dry_run:
            demand_calculator.bootup()

    if not dry_run:
        demand_calculator.update_history()

    # we also tell the driver about nodes that are unmatched. It filters them out
    # and returns a list of ones we can delete.
    idle_timeout = int(config.get("idle_timeout", 300))
    boot_timeout = int(config.get("boot_timeout", 3600))
    logging.fine("Idle timeout is %s", idle_timeout)

    unmatched_for_5_mins = demand_calculator.find_unmatched_for(
        at_least=idle_timeout)
    timed_out_booting = demand_calculator.find_booting(at_least=boot_timeout)

    # I don't care about nodes that have keep_alive=true
    timed_out_booting = [n for n in timed_out_booting if not n.keep_alive]

    timed_out_to_deleted = []
    unmatched_nodes_to_delete = []

    if timed_out_booting:
        logging.info("The following nodes have timed out while booting: %s",
                     timed_out_booting)
        timed_out_to_deleted = ge_driver.handle_boot_timeout(
            timed_out_booting) or []

    if unmatched_for_5_mins:
        node_expr = ", ".join([str(x) for x in unmatched_for_5_mins])
        logging.info("Unmatched for at least %s seconds: %s", idle_timeout,
                     node_expr)
        unmatched_nodes_to_delete = (
            ge_driver.handle_draining(unmatched_for_5_mins) or [])

    nodes_to_delete = []
    for node in timed_out_to_deleted + unmatched_nodes_to_delete:
        if node.assignments:
            logging.warning(
                "%s has jobs assigned to it so we will take no action.", node)
            continue
        nodes_to_delete.append(node)

    if nodes_to_delete:
        try:
            logging.info("Deleting %s", [str(n) for n in nodes_to_delete])
            delete_result = demand_calculator.delete(nodes_to_delete)

            if delete_result:
                # in case it has anything to do after a node is deleted (usually just remove it from the cluster)
                ge_driver.handle_post_delete(delete_result.nodes)
        except Exception as e:
            _exit_code = 1
            logging.warning(
                "Deletion failed, will retry on next iteration: %s", e)
            logging.exception(str(e))

    print_demand(config, demand_result, log=not dry_run)

    return demand_result
示例#16
0
def test_family_and_spots() -> None:
    bindings = MockClusterBinding("clusty")
    bindings.add_nodearray("htc", {},
                           spot=False,
                           max_count=10,
                           max_core_count=400)
    bindings.add_nodearray("hpc", {}, spot=False, max_placement_group_size=7)
    bindings.add_bucket(
        "htc",
        "Standard_F4s",
        max_count=20,
        available_count=10,
        family_consumed_core_count=40,
        family_quota_core_count=80,
        family_quota_count=20,
        regional_consumed_core_count=45,
        regional_quota_core_count=100,
        regional_quota_count=25,
    )

    bindings.add_bucket(
        "htc",
        "Standard_D4s_v3",
        max_count=20,
        available_count=10,
        family_consumed_core_count=40,
        family_quota_core_count=80,
        family_quota_count=20,
        regional_consumed_core_count=45,
        regional_quota_core_count=100,
        regional_quota_count=25,
    )

    bindings.add_bucket(
        "hpc",
        "Standard_D4s_v3",
        max_count=20,
        available_count=10,
        family_consumed_core_count=40,
        family_quota_core_count=80,
        family_quota_count=20,
        regional_consumed_core_count=45,
        regional_quota_core_count=100,
        regional_quota_count=25,
    )

    bindings.add_bucket(
        "hpc",
        "Standard_D4s_v3",
        max_count=20,
        available_count=10,
        family_consumed_core_count=40,
        family_quota_core_count=80,
        family_quota_count=20,
        regional_consumed_core_count=45,
        regional_quota_core_count=100,
        regional_quota_count=25,
        placement_groups=["123"],
    )

    bindings.add_nodearray("htcspot", {}, spot=True)
    bindings.add_bucket(
        "htcspot",
        "Standard_F4s",
        max_count=20,
        available_count=10,
        family_consumed_core_count=0,
        family_quota_core_count=0,
        family_quota_count=0,
        regional_consumed_core_count=45,
        regional_quota_core_count=100,
        regional_quota_count=25,
    )

    node_mgr = new_node_manager({"_mock_bindings": bindings})
    by_key: Dict[str,
                 NodeBucket] = partition(node_mgr.get_buckets(), lambda b:
                                         (b.nodearray, b.vm_size))
    htc = by_key[("htc", "Standard_F4s")][0]
    htc2 = by_key[("htc", "Standard_D4s_v3")][0]
    htcspot = by_key[("htcspot", "Standard_F4s")][0]
    hpcs = by_key[("hpc", "Standard_D4s_v3")]
    hpc_pg = [x for x in hpcs if x.placement_group][0]

    # ondemand instances use actual family quota
    assert htc.limits.family_max_count == 20
    assert htc2.limits.family_max_count == 20
    assert htc.limits.family_available_count == 10
    assert htc2.limits.family_available_count == 10

    # spot instances replace family with regional
    assert htcspot.limits.family_max_count == 25
    assert htcspot.limits.family_available_count == 13

    assert node_mgr.allocate(
        {
            "node.nodearray": "htc",
            "node.vm_size": "Standard_F4s"
        },
        node_count=1)
    # ondemand instances use actual family quota
    assert htc.limits.family_max_count == 20
    assert htc2.limits.family_max_count == 20
    assert htc.limits.family_available_count == 9
    assert htc2.limits.family_available_count == 10
    assert htc.limits.nodearray_available_count == 9
    assert htc2.limits.nodearray_available_count == 9
    assert htc.available_count == 9
    # nodearray limit affects htc2 since max_count=10
    assert htc2.available_count == 9

    # now the regional is affected by our allocation
    assert htcspot.limits.family_max_count == 25
    assert htcspot.limits.family_available_count == 13 - 1

    assert hpc_pg.available_count == 7
示例#17
0
def autoscale_hpcpack(
    config: Dict[str, Any],
    ctx_handler: DefaultContextHandler = None,
    hpcpack_rest_client: Optional[HpcRestClient] = None,
    dry_run: bool = False,
) -> None:

    if not hpcpack_rest_client:
        hpcpack_rest_client = new_rest_client(config)

    if ctx_handler:
        ctx_handler.set_context("[Sync-Status]")
    autoscale_config = config.get("autoscale") or {}
    # Load history info
    idle_timeout_seconds: int = autoscale_config.get("idle_timeout") or 600
    provisioning_timeout_seconds = autoscale_config.get("boot_timeout") or 1500
    statefile = autoscale_config.get(
        "statefile") or "C:\\cycle\\jetpack\\config\\autoscaler_state.txt"
    archivefile = autoscale_config.get(
        "archivefile") or "C:\\cycle\\jetpack\\config\\autoscaler_archive.txt"
    node_history = HpcNodeHistory(
        statefile=statefile,
        archivefile=archivefile,
        provisioning_timeout=provisioning_timeout_seconds,
        idle_timeout=idle_timeout_seconds)

    logging.info("Synchronizing the nodes between Cycle cloud and HPC Pack")

    # Initialize data of History info, cc nodes, HPC Pack nodes, HPC grow decisions
    # Get node list from Cycle Cloud
    def nodes_state_key(n: Node) -> Tuple[int, str, int]:
        try:
            state_pri = 1
            if n.state == 'Deallocated':
                state_pri = 2
            elif n.state == 'Stopping':
                state_pri = 3
            elif n.state == 'Terminating':
                state_pri = 4
            name, index = n.name.rsplit("-", 1)
            return (state_pri, name, int(index))
        except Exception:
            return (state_pri, n.name, 0)

    node_mgr: NodeManager = new_node_manager(config)
    for b in node_mgr.get_buckets():
        b.nodes.sort(key=nodes_state_key)
    cc_nodes: List[Node] = node_mgr.get_nodes()
    cc_nodes_by_id = partition_single(cc_nodes,
                                      func=lambda n: n.delayed_node_id.node_id)
    # Get compute node list and grow decision from HPC Pack
    hpc_node_groups = hpcpack_rest_client.list_node_groups()
    grow_decisions = hpcpack_rest_client.get_grow_decision()
    logging.info("grow decision: {}".format(grow_decisions))
    hpc_cn_nodes: List[HpcNode] = hpcpack_rest_client.list_computenodes()
    hpc_cn_nodes = [n for n in hpc_cn_nodes if n.active]

    # This function will link node history items, cc nodes and hpc nodes
    node_history.synchronize(cc_nodes, hpc_cn_nodes)

    cc_nodearrays = set([b.nodearray for b in node_mgr.get_buckets()])
    logging.info("Current node arrays in cyclecloud: {}".format(cc_nodearrays))

    # Create HPC node groups for CC node arrays
    cc_map_hpc_groups = ["CycleCloudNodes"] + list(cc_nodearrays)
    for cc_grp in cc_map_hpc_groups:
        if ci_notin(cc_grp, hpc_node_groups):
            logging.info("Create HPC node group: {}".format(cc_grp))
            hpcpack_rest_client.add_node_group(cc_grp,
                                               "Cycle Cloud Node group")

    # Add HPC nodes into corresponding node groups
    add_cc_tag_nodes = [
        n.name for n in hpc_cn_nodes if n.shall_addcyclecloudtag
    ]
    if len(add_cc_tag_nodes) > 0:
        logging.info(
            "Adding HPC nodes to node group CycleCloudNodes: {}".format(
                add_cc_tag_nodes))
        hpcpack_rest_client.add_node_to_node_group("CycleCloudNodes",
                                                   add_cc_tag_nodes)
    for cc_grp in list(cc_nodearrays):
        add_array_tag_nodes = [
            n.name for n in hpc_cn_nodes
            if n.shall_addnodearraytag and ci_equals(n.cc_nodearray, cc_grp)
        ]
        if len(add_array_tag_nodes) > 0:
            logging.info("Adding HPC nodes to node group {}: {}".format(
                cc_grp, add_array_tag_nodes))
            hpcpack_rest_client.add_node_to_node_group(cc_grp,
                                                       add_array_tag_nodes)

    # Possible values for HPC NodeState (states marked with * shall not occur for CC nodes):
    #   Unknown, Provisioning, Offline, Starting, Online, Draining, Rejected(*), Removing, NotDeployed(*), Stopping(*)
    # Remove the following HPC Pack nodes:
    #   1. The corresponding CC node already removed
    #   2. The corresponding CC node is stopped and HPC node is not assigned a node template
    # Take offline the following HPC Pack nodes:
    #   1. The corresponding CC node is stopped or is going to stop
    hpc_nodes_to_remove = [
        n.name for n in hpc_cn_nodes
        if n.removed_cc_node or (n.stopped_cc_node and not n.template_assigned)
    ]
    hpc_nodes_to_take_offline = [
        n.name for n in hpc_cn_nodes
        if n.stopped_cc_node and ci_equals(n.state, "Online")
    ]
    if len(hpc_nodes_to_remove) > 0:
        logging.info("Removing the HPC nodes: {}".format(hpc_nodes_to_remove))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.remove_nodes(hpc_nodes_to_remove)
    hpc_cn_nodes = [
        n for n in hpc_cn_nodes if not (n.stopped_cc_node or n.removed_cc_node)
    ]

    # Assign default node template for unapproved CC node
    hpc_nodes_to_assign_template = [
        n.name for n in hpc_cn_nodes
        if n.bound_cc_node and not n.template_assigned
    ]
    if len(hpc_nodes_to_assign_template) > 0:
        logging.info(
            "Assigning default node template for the HPC nodes: {}".format(
                hpc_nodes_to_assign_template))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.assign_default_compute_node_template(
                hpc_nodes_to_assign_template)

    ### Start scale up checking:
    logging.info("Start scale up checking ...")
    if ctx_handler:
        ctx_handler.set_context("[scale-up]")

    hpc_nodes_with_active_cc = [
        n for n in hpc_cn_nodes if n.template_assigned and n.bound_cc_node
    ]
    # Exclude the already online healthy HPC nodes before calling node_mgr.allocate
    for hpc_node in hpc_nodes_with_active_cc:
        if hpc_node.ready_for_job:
            hpc_node.bound_cc_node.closed = True

    # Terminate the provisioning timeout CC nodes
    cc_node_to_terminate: List[Node] = []
    for cc_node in cc_nodes:
        if ci_equals(cc_node.target_state, 'Deallocated') or ci_equals(
                cc_node.target_state,
                'Terminated') or cc_node.create_time_remaining:
            continue
        nhi = node_history.find(cc_id=cc_node.delayed_node_id.node_id)
        if not nhi.hpc_id:
            cc_node.closed = True
            cc_node_to_terminate.append(cc_node)
        else:
            hpc_node = ci_find_one(hpc_nodes_with_active_cc, nhi.hpc_id,
                                   lambda n: n.id)
            if hpc_node and hpc_node.error:
                cc_node.closed = True
                cc_node_to_terminate.append(cc_node)

    # "ComputeNodes", "CycleCloudNodes", "AzureIaaSNodes" are all treated as default
    # grow_by_socket not supported yet, treat as grow_by_node
    defaultGroups = [
        "Default", "ComputeNodes", "AzureIaaSNodes", "CycleCloudNodes"
    ]
    default_cores_to_grow = default_nodes_to_grow = 0.0

    # If the current CC nodes in the node array cannot satisfy the grow decision, the group is hungry
    # For a hungry group, no idle check is required if the node health is OK
    group_hungry: Dict[str, bool] = {}
    nbrNewNodes: int = 0
    grow_groups = list(grow_decisions.keys())
    for grp in grow_groups:
        tmp = grow_decisions.pop(grp)
        if not (tmp.cores_to_grow + tmp.nodes_to_grow + tmp.sockets_to_grow):
            continue
        if ci_in(grp, defaultGroups):
            default_cores_to_grow += tmp.cores_to_grow
            default_nodes_to_grow += tmp.nodes_to_grow + tmp.sockets_to_grow
            continue
        if ci_notin(grp, cc_nodearrays):
            logging.warning(
                "No mapping node array for the grow requirement {}:{}".format(
                    grp, grow_decisions[grp]))
            grow_decisions.pop(grp)
            continue
        group_hungry[grp] = False
        array = ci_lookup(grp, cc_nodearrays)
        selector = {'ncpus': 1, 'node.nodearray': [array]}
        target_cores = math.ceil(tmp.cores_to_grow)
        target_nodes = math.ceil(tmp.nodes_to_grow + tmp.sockets_to_grow)
        if target_nodes:
            logging.info("Allocate: {}  Target Nodes: {}".format(
                selector, target_nodes))
            result = node_mgr.allocate(selector, node_count=target_nodes)
            logging.info(result)
            if not result or result.total_slots < target_nodes:
                group_hungry[grp] = True
        if target_cores:
            logging.info("Allocate: {}  Target Cores: {}".format(
                selector, target_cores))
            result = node_mgr.allocate(selector, slot_count=target_cores)
            logging.info(result)
            if not result or result.total_slots < target_cores:
                group_hungry[grp] = True
        if len(node_mgr.new_nodes) > nbrNewNodes:
            group_hungry[grp] = True
        nbrNewNodes = len(node_mgr.new_nodes)

    # We then check the grow decision for the default node groups:
    checkShrinkNeeded = True
    growForDefaultGroup = True if default_nodes_to_grow or default_cores_to_grow else False
    if growForDefaultGroup:
        selector = {'ncpus': 1}
        if default_nodes_to_grow:
            target_nodes = math.ceil(default_nodes_to_grow)
            logging.info("Allocate: {}  Target Nodes: {}".format(
                selector, target_nodes))
            result = node_mgr.allocate({'ncpus': 1}, node_count=target_nodes)
            if not result or result.total_slots < target_nodes:
                checkShrinkNeeded = False
        if default_cores_to_grow:
            target_cores = math.ceil(default_cores_to_grow)
            logging.info("Allocate: {}  Target Cores: {}".format(
                selector, target_cores))
            result = node_mgr.allocate({'ncpus': 1}, slot_count=target_cores)
            if not result or result.total_slots < target_cores:
                checkShrinkNeeded = False
        if len(node_mgr.new_nodes) > nbrNewNodes:
            checkShrinkNeeded = False
        nbrNewNodes = len(node_mgr.new_nodes)

    if nbrNewNodes > 0:
        logging.info("Need to Allocate {} nodes in total".format(nbrNewNodes))
        if dry_run:
            logging.info("Dry-run: skipping node bootup...")
        else:
            logging.info("Allocating {} nodes in total".format(
                len(node_mgr.new_nodes)))
            bootup_result: BootupResult = node_mgr.bootup()
            logging.info(bootup_result)
            if bootup_result and bootup_result.nodes:
                for cc_node in bootup_result.nodes:
                    nhi = node_history.find(
                        cc_id=cc_node.delayed_node_id.node_id)
                    if nhi is None:
                        nhi = node_history.insert(
                            NodeHistoryItem(cc_node.delayed_node_id.node_id))
                    else:
                        nhi.restart()
    else:
        logging.info("No need to allocate new nodes ...")

    ### Start the shrink checking
    if ctx_handler:
        ctx_handler.set_context("[scale-down]")

    cc_node_to_shutdown: List[Node] = []
    if not checkShrinkNeeded:
        logging.info("No shrink check at this round ...")
        if not dry_run:
            for nhi in node_history.items:
                if not nhi.stopped and nhi.hpc_id:
                    nhi.idle_from = None
    else:
        logging.info("Start scale down checking ...")
        # By default, we check idle for active CC nodes in HPC Pack with 'Offline', 'Starting', 'Online', 'Draining' state
        candidate_idle_check_nodes = [
            n for n in hpc_nodes_with_active_cc
            if (not n.bound_cc_node.keep_alive)
            and ci_in(n.state, ["Offline", "Starting", "Online", "Draining"])
        ]

        # We can exclude some nodes from idle checking:
        # 1. If HPC Pack ask for grow in default node group(s), all healthy ONLINE nodes are considered as busy
        # 2. If HPC Pack ask for grow in certain node group, all healthy ONLINE nodes in that node group are considered as busy
        # 3. If a node group is hungry (new CC required or grow request not satisfied), no idle check needed for all nodes in that node array
        if growForDefaultGroup:
            candidate_idle_check_nodes = [
                n for n in candidate_idle_check_nodes if not n.ready_for_job
            ]
        for grp, hungry in group_hungry.items():
            if hungry:
                candidate_idle_check_nodes = [
                    n for n in candidate_idle_check_nodes
                    if not ci_equals(grp, n.cc_nodearray)
                ]
            elif not growForDefaultGroup:
                candidate_idle_check_nodes = [
                    n for n in candidate_idle_check_nodes
                    if not (ci_equals(grp, n.cc_nodearray) and n.ready_for_job)
                ]

        curtime = datetime.utcnow()
        # Offline node must be idle
        idle_node_names = [
            n.name for n in candidate_idle_check_nodes
            if ci_equals(n.state, 'Offline')
        ]
        if len(candidate_idle_check_nodes) > len(idle_node_names):
            idle_nodes = hpcpack_rest_client.check_nodes_idle([
                n.name for n in candidate_idle_check_nodes
                if not ci_equals(n.state, 'Offline')
            ])
            if len(idle_nodes) > 0:
                idle_node_names.extend([n.node_name for n in idle_nodes])

        if len(idle_node_names) > 0:
            logging.info(
                "The following node is idle: {}".format(idle_node_names))
        else:
            logging.info("No idle node found in this round.")

        retention_days = autoscale_config.get("vm_retention_days") or 7
        for nhi in node_history.items:
            if nhi.stopped:
                if nhi.stop_time + timedelta(
                        days=retention_days) < datetime.utcnow():
                    cc_node = cc_nodes_by_id.get(nhi.cc_id)
                    if cc_node is not None:
                        cc_node_to_terminate.append(cc_node)
                continue
            if ci_in(nhi.hostname, idle_node_names):
                if nhi.idle_from is None:
                    nhi.idle_from = curtime
                elif nhi.idle_timeout(idle_timeout_seconds):
                    nhi.stop_time = curtime
                    cc_node = cc_nodes_by_id.get(nhi.cc_id)
                    if cc_node is not None:
                        cc_node_to_shutdown.append(cc_node)
            else:
                nhi.idle_from = None

    shrinking_cc_node_ids = [
        n.delayed_node_id.node_id for n in cc_node_to_terminate
    ]
    shrinking_cc_node_ids.extend(
        [n.delayed_node_id.node_id for n in cc_node_to_shutdown])
    hpc_nodes_to_bring_online = [
        n.name for n in hpc_nodes_with_active_cc
        if ci_equals(n.state, 'Offline') and not n.error
        and ci_notin(n.cc_node_id, shrinking_cc_node_ids)
    ]
    hpc_nodes_to_take_offline.extend([
        n.name for n in hpc_nodes_with_active_cc
        if ci_equals(n.state, 'Online')
        and ci_in(n.cc_node_id, shrinking_cc_node_ids)
    ])
    if len(hpc_nodes_to_bring_online) > 0:
        logging.info("Bringing the HPC nodes online: {}".format(
            hpc_nodes_to_bring_online))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.bring_nodes_online(hpc_nodes_to_bring_online)

    if len(hpc_nodes_to_take_offline) > 0:
        logging.info("Taking the HPC nodes offline: {}".format(
            hpc_nodes_to_take_offline))
        if dry_run:
            logging.info("Dry-run: no real action")
        else:
            hpcpack_rest_client.take_nodes_offline(hpc_nodes_to_take_offline)

    if len(cc_node_to_shutdown) > 0:
        logging.info("Shut down the following Cycle cloud node: {}".format(
            [cn.name for cn in cc_node_to_shutdown]))
        if dry_run:
            logging.info("Dry-run: skip ...")
        else:
            node_mgr.shutdown_nodes(cc_node_to_shutdown)

    if len(cc_node_to_terminate) > 0:
        logging.info(
            "Terminating the following provisioning-timeout Cycle cloud nodes: {}"
            .format([cn.name for cn in cc_node_to_terminate]))
        if dry_run:
            logging.info("Dry-run: skip ...")
        else:
            node_mgr.terminate_nodes(cc_node_to_terminate)

    if not dry_run:
        logging.info("Save node history: {}".format(node_history))
        node_history.save()
示例#18
0
def _node_mgr(bindings: MockClusterBinding) -> NodeManager:
    return new_node_manager({"_mock_bindings": bindings})
示例#19
0
def shutdown_nodes_node_mgr() -> None:
    node_names = ["htc-1"]
    node_mgr = new_node_manager(CONFIG)
    to_shutdown = [x for x in node_mgr.get_nodes() if x.name in node_names]
    if to_shutdown:
        node_mgr.delete(to_shutdown)