Пример #1
0
    def do_allocate(
        self,
        node_mgr: nodemanager.NodeManager,
        allow_existing: bool,
        all_or_nothing: bool,
    ) -> AllocationResult:
        if self.__node_count > 0:

            return node_mgr.allocate(
                self._constraints,
                node_count=self.__node_count,
                allow_existing=allow_existing,
                all_or_nothing=self.__colocated,
                assignment_id=self.name,
            )

        assert self.iterations_remaining > 0

        result = node_mgr.allocate(
            self._constraints,
            slot_count=self.iterations_remaining,
            allow_existing=allow_existing,
            # colocated is always all or nothing
            all_or_nothing=all_or_nothing or self.colocated,
            assignment_id=self.name,
        )
        if result:
            self.iterations_remaining -= result.total_slots
        return result
Пример #2
0
def add_ccnodeid_default_resource(node_mgr: NodeManager) -> None:
    """
    In order for this to function properly, ccnodeid must be defined
    """
    def get_node_id(n: Node) -> Optional[str]:
        return n.delayed_node_id.node_id

    node_mgr.add_default_resource({}, "ccnodeid", get_node_id)
Пример #3
0
def test_top_level_limits(node_mgr: NodeManager) -> None:
    assert node_mgr.cluster_max_core_count == 10_000
    assert node_mgr.cluster_consumed_core_count == 0
    assert ["westus2"] == node_mgr.get_locations()
    assert node_mgr.get_regional_consumed_core_count("westus2") == 0
    assert node_mgr.get_regional_max_core_count("westus2") == 80

    assert node_mgr.allocate({"node.vcpu_count": 4}, node_count=1)
    assert node_mgr.cluster_consumed_core_count == 4
    assert node_mgr.get_regional_consumed_core_count("westus2") == 4
Пример #4
0
def test_over_allocate(node_mgr: NodeManager) -> None:
    assert node_mgr.allocate({"node.nodearray": "htc"}, node_count=1)
    # can't allocate 10, because there are only 9 left
    assert not node_mgr.allocate(
        {"node.nodearray": "htc"}, node_count=10, all_or_nothing=True)

    result = node_mgr.allocate({"node.nodearray": "htc"},
                               node_count=10,
                               all_or_nothing=False)
    assert result and len(result.nodes) == 9
    assert result.nodes[0].nodearray == "htc"
Пример #5
0
 def make_requests(node_mgr: NodeManager) -> None:
     for n, mag in enumerate(magnitudes):
         node_count = None if slots_or_nodes[n] else mag
         slot_count = None if node_count else mag
         node_mgr.allocate(
             {
                 "ncpus": ncpus_per_job[n],
                 "exclusive": exclusivity[n]
             },
             node_count=node_count,
             slot_count=slot_count,
         )
Пример #6
0
def test_no_buckets():
    node_mgr = NodeManager(MockClusterBinding(), [])
    dc = DemandCalculator(
        node_mgr, NullNodeHistory(), singleton_lock=util.NullSingletonLock()
    )
    result = dc._add_job(Job("1", {"ncpus": 2}))
    assert not result
    assert "NoBucketsDefined" == result.status
Пример #7
0
    def preprocess_node_mgr(self, config: Dict, node_mgr: NodeManager) -> None:
        """
        We add a default resource to map group_id to node.placement_group
        """
        super().preprocess_node_mgr(config, node_mgr)

        def group_id(node: Node) -> str:
            return node.placement_group if node.placement_group else "_none_"

        node_mgr.add_default_resource({},
                                      "group_id",
                                      group_id,
                                      allow_none=False)

        def ungrouped(node: Node) -> str:
            return str(not bool(node.placement_group)).lower()

        node_mgr.add_default_resource({}, "ungrouped", ungrouped)
Пример #8
0
def validate_hg_intersections(ge_env: GridEngineEnvironment,
                              node_mgr: NodeManager,
                              warn_function: WarnFunction) -> bool:
    bucket_to_hgs: Dict[str, Set[str]] = {}
    for bucket in node_mgr.get_buckets():
        if bucket.bucket_id not in bucket_to_hgs:
            bucket_to_hgs[str(bucket)] = set()

    by_str = partition_single(node_mgr.get_buckets(), str)

    for queue in ge_env.queues.values():
        if not queue.autoscale_enabled:
            continue

        for hostgroup in queue.bound_hostgroups.values():
            for bucket in node_mgr.get_buckets():
                is_satisfied = True
                for constraint in hostgroup.constraints:
                    result = constraint.satisfied_by_bucket(bucket)
                    if not result:
                        is_satisfied = False
                        break
                if is_satisfied:
                    bucket_to_hgs[str(bucket)].add(hostgroup.name)

    failure = False
    for bkey, matches in bucket_to_hgs.items():
        bucket = by_str[bkey]
        if not matches:
            warn_function(
                "%s is not matched by any hostgroup. This is not an error.",
                bucket,
            )
        elif len(matches) > 1:
            # seq_no will be used to determine ties
            if not ge_env.scheduler.sort_by_seqno:
                warn_function(
                    "%s is matched by more than one hostgroup %s. This is not an error.",
                    bucket,
                    ",".join(matches),
                )
    return failure
Пример #9
0
def add_default_placement_groups(config: Dict, node_mgr: NodeManager) -> None:
    nas = config.get("nodearrays", {})
    for name, child in nas.items():
        if child.get("placement_groups"):
            return

    by_pg = partition(node_mgr.get_buckets(), lambda b:
                      (b.nodearray, b.placement_group))
    by_na_vm = partition(node_mgr.get_buckets(), lambda b:
                         (b.nodearray, b.vm_size))

    for key, buckets in by_na_vm.items():
        nodearray, vm_size = key
        non_pg_buckets = [b for b in buckets if not b.placement_group]
        if not non_pg_buckets:
            # hardcoded PlacementGroupId
            logging.debug(
                "Nodearray %s defines PlacementGroupId, so no additional " +
                "placement groups will be created automatically.",
                nodearray,
            )
            continue
        bucket = non_pg_buckets[0]
        if not bucket.supports_colocation:
            continue

        buf_size = int(
            nas.get(nodearray, {}).get("generated_placement_group_buffer", 2))
        buf_remaining = buf_size
        pgi = 0
        while buf_remaining > 0:
            pg_name = ht.PlacementGroup("{}_pg{}".format(vm_size, pgi))
            pg_key = (nodearray, pg_name)
            if pg_key not in by_pg:
                logging.fine("Adding placement group %s", pg_name)
                node_mgr.add_placement_group(pg_name, bucket)
                buf_remaining -= 1
            pgi += 1
Пример #10
0
def test_packing(node_mgr: NodeManager) -> None:
    # htc node can fit 4 ncpus, so only allocate one node
    result = node_mgr.allocate({
        "node.nodearray": "htc",
        "ncpus": 1
    },
                               slot_count=2)
    assert result, str(result)
    assert len(result.nodes) == 1, result.nodes
    assert result.nodes[0].name == "htc-1"
    assert result.nodes[0].resources["ncpus"] == 4
    assert result.nodes[0].available["ncpus"] == 2, result.nodes[0].available[
        "ncpus"]
    assert len(node_mgr.new_nodes) == 1, len(node_mgr.new_nodes)

    # htc node can fit 4 ncpus, but 2 are used up on the first node, so allocate a second
    result = node_mgr.allocate({
        "node.nodearray": "htc",
        "ncpus": 1
    },
                               slot_count=4)
    assert result
    assert len(result.nodes) == 2, result.nodes
    assert result.nodes[0].name == "htc-1"
    assert result.nodes[1].name == "htc-2"
    assert len(node_mgr.new_nodes) == 2, [n.name for n in node_mgr.new_nodes]
    assert len(set([n.name for n in node_mgr.new_nodes])) == 2
    result = node_mgr.allocate({
        "node.nodearray": "htc",
        "ncpus": 1
    },
                               slot_count=2)
    assert len(result.nodes) == 1
    assert result.nodes[0].name == "htc-2"

    assert len(node_mgr.new_nodes) == 2
Пример #11
0
def test_single_alloc(node_mgr: NodeManager) -> None:
    result = node_mgr.allocate({"node.nodearray": "htc"}, node_count=1)
    assert result and len(result.nodes) == 1
    assert result.nodes[0].nodearray == "htc"
Пример #12
0
def test_unmanaged_nodes(node_mgr: NodeManager) -> None:
    assert len(node_mgr.get_buckets()) == 2
    tux = SchedulerNode("tux", bucket_id=ht.BucketId("tuxid"))
    node_mgr.add_unmanaged_nodes([tux])
    assert len(node_mgr.get_buckets()) == 3
    assert node_mgr.get_buckets_by_id()[tux.bucket_id].nodes == [tux]

    tux2 = SchedulerNode("tux2", bucket_id=tux.bucket_id)
    node_mgr.add_unmanaged_nodes([tux2])
    assert len(node_mgr.get_buckets()) == 3
    assert node_mgr.get_buckets_by_id()[tux.bucket_id].nodes == [tux, tux2]

    node_mgr.add_unmanaged_nodes([tux, tux2])
    assert len(node_mgr.get_buckets()) == 3
    assert node_mgr.get_buckets_by_id()[tux.bucket_id].nodes == [tux, tux2]
Пример #13
0
def test_node_software_configuration_alias(node_mgr: NodeManager) -> None:
    node_mgr.add_default_resource({}, "int_alias",
                                  "node.software_configuration.custom_int")
    b = node_mgr.get_buckets()[0]
    assert b.resources["int_alias"] == b.software_configuration["custom_int"]
Пример #14
0
def test_node_resources_alias(node_mgr: NodeManager) -> None:
    node_mgr.add_default_resource({}, "memgb_alias", "node.resources.memgb")
    b = node_mgr.get_buckets()[0]
    assert b.resources["memgb_alias"] == b.resources["memgb"]