def test_users() -> None: ge_env = common_ge_env() hpc_q = ge_env.queues["hpc.q"] # make sure common_ge_env didn't add users assert not hpc_q.user_lists assert not hpc_q.xuser_lists # this may seem odd, but this is how these become expressed # i.e. this hostgroup can run users ryan and ben, but not ben hpc_q.user_lists["@hpc.q_rr0"] = ["ryan", "ben"] hpc_q.xuser_lists["@hpc.q_rr0"] = ["ben"] hg = ge_env.hostgroups["@hpc.q_rr0"] bh = BoundHostgroup(hpc_q, hg, 0) # no user, ben and random should never succeed assert isinstance(bh.make_constraint(ge_env, user=None), Never) assert isinstance(bh.make_constraint(ge_env, user="******"), Never) assert isinstance(bh.make_constraint(ge_env, user="******"), Never) # ok, the real constraint - user==ryan user_cons = bh.make_constraint(ge_env, user="******") assert isinstance(user_cons, And) node = SchedulerNode("tux") node._Node__nodearray = "hpc" assert user_cons.satisfied_by_node(node)
def test_clone() -> None: orig = SchedulerNode("lnx0", {"ncpus": 4}) orig.metadata["exists_in_both"] = True new = orig.clone() assert new.available["ncpus"] == 4 assert new.resources["ncpus"] == 4 new.available["ncpus"] -= 1 assert new.available["ncpus"] == 3 assert orig.available["ncpus"] == 4 job = Job("1", {"ncpus": 2}) new.decrement(job._constraints, assignment_id=job.name) assert new.available["ncpus"] == 1 assert orig.available["ncpus"] == 4 assert new.assignments == set(["1"]) assert orig.assignments == set() orig.metadata["exists_in_orig"] = True new.metadata["exists_in_new"] = True assert orig.metadata["exists_in_both"] is True assert "exists_in_new" not in orig.metadata assert orig.metadata["exists_in_orig"] is True assert new.metadata["exists_in_both"] is True assert new.metadata["exists_in_new"] is True assert "exists_in_orig" not in new.metadata
def test_projects() -> None: ge_env = common_ge_env() hpc_q = ge_env.queues["hpc.q"] # make sure common_ge_env didn't add projects assert not hpc_q.projects assert not hpc_q.xprojects # this may seem odd, but this is how these become expressed # i.e. this hostgroup can run prj1 and prj2, but not prj2 hpc_q.projects["@hpc.q_rr0"] = ["prj1", "prj2"] hpc_q.xprojects["@hpc.q_rr0"] = ["prj2"] hg = ge_env.hostgroups["@hpc.q_rr0"] bh = BoundHostgroup(hpc_q, hg, 0) # no project, ben and random should never succeed # yes - GridEngine will NOT schedule a job if a project is not defined assert isinstance(bh.make_constraint(ge_env, project=None), Never) assert isinstance(bh.make_constraint(ge_env, project="prj2"), Never) assert isinstance(bh.make_constraint(ge_env, project="random"), Never) # ok, the real constraint - project==prj1 prj1_cons = bh.make_constraint(ge_env, project="prj1") assert isinstance(prj1_cons, And) node = SchedulerNode("tux") node._Node__nodearray = "hpc" assert prj1_cons.satisfied_by_node(node)
def test_node_resource_constraint() -> None: assert ( NodeResourceConstraint("blah", "A").to_dict() == get_constraint({"blah": ["A"]}).to_dict() ) c = get_constraint({"blah": ["A"]}) assert isinstance(c, NodeResourceConstraint) assert -1 == c.minimum_space(SchedulerNode("")) assert c.do_decrement(SchedulerNode("")) assert not c.satisfied_by_node(SchedulerNode("no-blah-define")) assert not c.satisfied_by_node(SchedulerNode("wrong-blah-define", {"blah": "B"})) assert c.satisfied_by_node(SchedulerNode("wrong-blah-define", {"blah": "A"}))
def test_or() -> None: assert ( Or( NodeResourceConstraint("blah", "A"), NodeResourceConstraint("blah", "B") ).to_dict() == get_constraint({"or": [{"blah": ["A"]}, {"blah": ["B"]}]}).to_dict() ) or_expr = {"or": [{"blah": ["A"]}, {"blah": ["B"]}]} assert isinstance(get_constraint(or_expr), Or) c = get_constraint({"node.vcpu_count": 2}) assert -1 == c.minimum_space(SchedulerNode("")) assert c.do_decrement(SchedulerNode(""))
def do_draw(self, data: Any) -> ht.VMSize: import hypothesis.internal.conjecture.utils as d idx = d.integer_range(data, 0, 1_000_000_000) r = random.Random(idx) def draw_value() -> Optional[Any]: rtype_draw = r.randint(0, 4) if rtype_draw == 0: return r.randint(0, 100) elif rtype_draw == 1: return r.random() * 100 elif rtype_draw == 2: def draw_letter(): return r.choice(string.ascii_letters) return "".join([draw_letter() for n in range(r.randint(0, 100))]) elif rtype_draw == 3: return r.random() < 0.5 else: return None hostname = "n-o-d-e_-{}".format(r.randint(1, 1000000)) resources: Dict[str, Optional[Any]] = {} num_resources = r.randint(0, 10) for n in range(num_resources): rname = "res-{}".format(n) resources[rname] = draw_value() node = SchedulerNode(ht.Hostname(hostname), resources, "bucket-id-123") for job_id in range(r.randint(0, 10)): node.assign(str(job_id)) num_meta = r.randint(0, 10) for n in range(num_meta): mname = "meta-{}".format(n) node.metadata[mname] = draw_value() for rname, rvalue in node.resources.items(): if r.random() > 0.5: if isinstance(rvalue, int): node.available[rname] = rvalue - r.randint(0, rvalue + 1) elif isinstance(rvalue, float): node.available[rname] = rvalue * r.random() elif isinstance(rvalue, bool): node.available[rname] = rvalue ^ (r.random() < 0.5) elif rvalue is None: # in theory you can change a null resource # into an actual value as available if r.random() < 0.25: node.available[rname] = draw_value() return node
def new_node( pg: Optional[str] = None, hostname: str = "tux1", hostgroup: Optional[Hostgroup] = None, ) -> SchedulerNode: node = SchedulerNode(hostname, {"slot_type": "highmem"}) if pg: node.placement_group = pg if hostgroup: util.add_node_to_hostgroup(node, hostgroup) return node
def test_unmanaged_nodes(node_mgr: NodeManager) -> None: assert len(node_mgr.get_buckets()) == 2 tux = SchedulerNode("tux", bucket_id=ht.BucketId("tuxid")) node_mgr.add_unmanaged_nodes([tux]) assert len(node_mgr.get_buckets()) == 3 assert node_mgr.get_buckets_by_id()[tux.bucket_id].nodes == [tux] tux2 = SchedulerNode("tux2", bucket_id=tux.bucket_id) node_mgr.add_unmanaged_nodes([tux2]) assert len(node_mgr.get_buckets()) == 3 assert node_mgr.get_buckets_by_id()[tux.bucket_id].nodes == [tux, tux2] node_mgr.add_unmanaged_nodes([tux, tux2]) assert len(node_mgr.get_buckets()) == 3 assert node_mgr.get_buckets_by_id()[tux.bucket_id].nodes == [tux, tux2]
def _print_demand(output_format: OutputFormat) -> str: stream = io.StringIO() node = SchedulerNode("tux", {"ncpus": 2, "mem": Memory.value_of("1.0g")}) node.available["ncpus"] = 1 node.assign("11") node.assign("12") result = DemandResult([], [node], [], []) print_demand( ["hostname", "job_ids", "ncpus", "*ncpus", "mem"], result, stream=stream, output_format=output_format, ) return stream.getvalue()
def run_test( ctype: str, node_pcpu: Optional[N], hg_pcpu: N, q_default_pcpu: N, complex_default: Optional[N], ) -> SchedulerNode: cast = float if ctype == "DOUBLE" else int node_res = {} if node_pcpu is not None: node_res["pcpu"] = cast(node_pcpu) node_res["p"] = cast(node_pcpu) node = SchedulerNode("tux", node_res) ge_env = common_ge_env() q = ge_env.queues["hpc.q"] complex_default_str = ( "NONE" if complex_default is None else str(complex_default) ) ge_env.complexes["pcpu"] = Complex( "pcpu", "p", ctype, "<=", True, True, complex_default_str, 0 ) q.complex_values[None] = {"pcpu": cast(q_default_pcpu)} q.complex_values["@hpc.q"] = {"pcpu": cast(hg_pcpu)} assert node.available.get("pcpu") == node_pcpu process_quotas(node, ge_env.complexes, ["@hpc.q"], [q]) return node
def run_test( ctype: str, node_lic: Optional[bool], hg_lic: bool, q_default_lic: bool, complex_default: Optional[bool], ) -> SchedulerNode: node_res = {} if node_lic is not None: node_res["lic"] = node_lic node_res["l"] = node_lic node = SchedulerNode("tux", node_res) ge_env = common_ge_env() q = ge_env.queues["hpc.q"] complex_default_str = ( "NONE" if complex_default is None else str(complex_default) ) ge_env.complexes["lic"] = Complex( "lic", "l", ctype, "<=", True, True, complex_default_str, 0 ) q.complex_values[None] = {"lic": q_default_lic} q.complex_values["@hpc.q"] = {"lic": hg_lic} assert node.available.get("lic") == node_lic process_quotas(node, ge_env.complexes, ["@hpc.q"], [q]) return node
def celery_status(): from celery import Celery app = Celery() appc = app.control.inspect() celery_d = celery_driver() celery_d.jobs = [] master_name = socket.gethostname() nodes = set() i = 0 for arr in (arr for arr in [appc.active(), appc.reserved()] if arr != None): i += 1 for k, v in arr.items(): on_master = False if c_strip(k) == master_name: on_master = True nodes.add(c_strip(k)) for _job in v: print(_job) if i == 1 and not on_master: job = Job(name=_job['id'], constraints={"ncpus": 1}, executing_hostnames=[c_strip(_job['hostname'])]) else: job = Job(name=_job['id'], constraints={"ncpus": 1}) celery_d.jobs.append(job) celery_d.scheduler_nodes = [SchedulerNode(hostname=x) for x in list(nodes)] return celery_d
def test_node_property_constraint() -> None: assert ( NodePropertyConstraint("vcpu_count", 2).to_dict() == get_constraint({"node.vcpu_count": 2}).to_dict() ) assert isinstance(get_constraint({"node.vcpu_count": 2}), NodePropertyConstraint) for attr in dir(Node): if not attr[0].lower(): continue try: get_constraint({"node.{}".format(attr): 2}) except ValueError: assert attr not in QUERYABLE_PROPERTIES c = get_constraint({"node.vcpu_count": 2}) assert -1 == c.minimum_space(SchedulerNode("")) assert c.do_decrement(SchedulerNode(""))
def test_xor() -> None: assert ( XOr( NodeResourceConstraint("blah", "A"), NodeResourceConstraint("blah", "B") ).to_dict() == get_constraint({"xor": [{"blah": ["A"]}, {"blah": ["B"]}]}).to_dict() ) xor_expr = {"xor": [{"blah": ["A"]}, {"blah": ["B"]}]} assert isinstance(get_constraint(xor_expr), XOr) c = XOr(NodeResourceConstraint("blah", "A"), NodeResourceConstraint("blah", "B")) assert not c.satisfied_by_node(SchedulerNode("")) assert not c.satisfied_by_node(SchedulerNode("", {"blah": ["A", "B"]})) assert c.satisfied_by_node(SchedulerNode("", {"blah": "A"})) assert c.satisfied_by_node(SchedulerNode("", {"blah": "B"})) assert c.do_decrement(SchedulerNode("", {"blah": "A"}))
def test_job_excl() -> None: s = SchedulerNode("") # typical exclusive behavior - one task per job per node job_excl = get_constraint({"exclusive": True}) assert job_excl.job_exclusive assert isinstance(job_excl, ExclusiveNode) assert job_excl.satisfied_by_node(s) assert -1 == job_excl.minimum_space(s) assert job_excl.do_decrement(s) s.assign("1") job_excl.assignment_id = "1" # can't put the same jobid on the same node twice assert not job_excl.satisfied_by_node(s) assert not job_excl.do_decrement(s) assert s.closed assert 0 == job_excl.minimum_space(s)
def _test(select: Dict, hostgroups: Union[str, List[str]]) -> Dict: node = SchedulerNode("localhost") config = { "gridengine": { "default_hostgroups": [{"select": select, "hostgroups": hostgroups}] } } return get_node_hostgroups(config, node)
def handle_join_cluster(self, matched_nodes: List[Node]) -> List[Node]: ret = [] for node in matched_nodes: if node.hostname and node.hostname not in self.ge_env.current_hostnames: self.ge_env.add_node( SchedulerNode(node.hostname, node.resources)) ret.append(node) return ret
def clone_dcalc(dcalc: DemandCalculator) -> DemandCalculator: scheduler_nodes = [ SchedulerNode(str(n.hostname), dict(n.resources)) for n in dcalc.get_compute_nodes() ] return new_demand_calculator( {}, node_mgr=dcalc.node_mgr, existing_nodes=scheduler_nodes )
def test_add_remove_nodes() -> None: scheduler = GridEngineScheduler({}) qbin = QBinImpl(is_uge=True) ge_env = GridEngineEnvironment(scheduler, qbin=qbin) # should have set like semantics for adding/removing ge_env.add_node(SchedulerNode("tux")) assert len(ge_env.nodes) == 1 assert ge_env.current_hostnames == ["tux"] ge_env.add_node(SchedulerNode("tux")) assert len(ge_env.nodes) == 1 assert ge_env.current_hostnames == ["tux"] ge_env.delete_node(SchedulerNode("tux")) assert len(ge_env.nodes) == 0 assert ge_env.current_hostnames == [] # add remove two nodes ge_env.add_node(SchedulerNode("tux1")) ge_env.add_node(SchedulerNode("tux2")) assert len(ge_env.nodes) == 2 assert sorted(ge_env.current_hostnames) == sorted(["tux1", "tux2"]) ge_env.delete_node(SchedulerNode("tux1")) ge_env.delete_node(SchedulerNode("tux2")) assert len(ge_env.nodes) == 0 assert sorted(ge_env.current_hostnames) == sorted([])
def test_non_schedulable_shared_resources() -> None: # what if say, qres is created but is not used for scheduling # what happens if it hits the limit any ways? test_queue = PBSProQueue( name="testq", queue_type="execution", total_jobs=0, state_count={}, resources_default={}, default_chunk={}, node_group_enable=True, node_group_key="group_id", resource_state=ResourceState( resources_available={}, resources_assigned={}, shared_resources={ "qres": [ SharedConsumableResource( resource_name="qres", source="queue", current_value=4, initial_value=4, ) ] }, ), resource_definitions={ "qres": PBSProResourceDefinition("qres", LongType(), flag="q") }, enabled=True, started=True, ) non_host_cons = test_queue.get_non_host_constraints({"qres": 1, "other": 2}) assert len(non_host_cons) == 1 assert len(non_host_cons[0].shared_resources) == 1 assert non_host_cons[0].shared_resources[0].resource_name == "qres" assert non_host_cons[0].shared_resources[0].initial_value == 4 assert non_host_cons[0].shared_resources[0].current_value == 4 assert test_queue.resource_state.shared_resources["qres"][0].current_value == 4 snode = SchedulerNode("localhost", {}) assert snode.decrement(non_host_cons) assert test_queue.resource_state.shared_resources["qres"][0].current_value == 3
def test_min_resource_per_node() -> None: assert ( MinResourcePerNode("pcpus", 2).to_dict() == get_constraint({"pcpus": 2}).to_dict() ) c = get_constraint({"pcpus": 2}) assert isinstance(c, MinResourcePerNode) assert 0 == c.minimum_space(SchedulerNode("")) try: assert not c.do_decrement(SchedulerNode("")) assert False except RuntimeError: pass s = SchedulerNode("has-pcpus", {"pcpus": 4}) assert s.available["pcpus"] == 4 assert c.do_decrement(s) assert s.available["pcpus"] == 2 assert s.resources["pcpus"] == 4 assert not c.satisfied_by_node(SchedulerNode("no-blah-define")) assert not c.satisfied_by_node(SchedulerNode("wrong-blah-define", {"pcpus": 1})) assert c.satisfied_by_node(SchedulerNode("min-blah-define", {"pcpus": 2})) assert c.satisfied_by_node(SchedulerNode("more-blah-define", {"pcpus": 100}))
def test_never() -> None: c = Never("my message") node = SchedulerNode("test", {"memgb": 4.0}) assert not c.satisfied_by_node(node) assert c.satisfied_by_node(node).reasons == ["my message"] c = get_constraint({"never": "my other message"}) assert isinstance(c, Never) assert c.message == "my other message"
def test_down_long_enough() -> None: node = SchedulerNode("localhost", {}) now = datetime.datetime.now() # False: missing last_state_change_time driver = PBSProDriver(down_timeout=300) assert not driver._down_long_enough(now, node) # False: last_state_change_time < 300 seconds ago last_state_change_time = now - datetime.timedelta(seconds=1) node.metadata["last_state_change_time"] = datetime.datetime.ctime( last_state_change_time) assert not driver._down_long_enough(now, node) # True: last_state_change_time > 300 seconds ago last_state_change_time = now - datetime.timedelta(seconds=301) node.metadata["last_state_change_time"] = datetime.datetime.ctime( last_state_change_time) assert driver._down_long_enough(now, node)
def test_task_excl() -> None: s = SchedulerNode("") # now to test tack exclusive, where multiple tasks from the same # job can run on the same machine task_excl = get_constraint({"exclusive_task": True}) assert not task_excl.job_exclusive assert isinstance(task_excl, ExclusiveNode) assert task_excl.satisfied_by_node(s) assert -1 == task_excl.minimum_space(s) assert task_excl.do_decrement(s) s.assign("1") task_excl.assignment_id = "1" assert task_excl.satisfied_by_node(s) assert task_excl.do_decrement(s) assert s.closed assert -1 == task_excl.minimum_space(s)
def clone_dcalc(dcalc: DemandCalculator) -> DemandCalculator: scheduler_nodes = [ SchedulerNode(str(n.name), dict(n.resources)) for n in dcalc.get_compute_nodes() ] if hasattr(dcalc.node_history, "conn"): conn = getattr(dcalc.node_history, "conn") conn.close() return new_demand_calculator({}, node_mgr=dcalc.node_mgr, existing_nodes=scheduler_nodes)
def preprocess_nodes_stdin() -> None: # load the json from stdin node_dicts = json.load(sys.stdin) # parse the job dictionaries into hpc Job objects nodes = [SchedulerNode.from_dict(n) for n in node_dicts] # run our preprocessing modified_nodes = preprocess_nodes(nodes) # finally dump the modified jobs out to stdout json.dump(modified_nodes, sys.stdout, default=lambda x: x.to_dict())
def test_quota_bound_resource_number() -> None: ge_env = common_ge_env() hpcq = ge_env.queues["hpc.q"] htcq = ge_env.queues["htc.q"] hpcq.complex_values[None] = {"pcpu": 6} htcq.complex_values[None] = {"pcpu": 4} node = SchedulerNode("tux", resources={"pcpu": 8}) node.available["hpc.q@pcpu"] = 6 node.available["htc.q@pcpu"] = 4 c1 = make_quota_bound_consumable_constraint("pcpu", 1, hpcq, ge_env, ["@hpc.q"]) c2 = make_quota_bound_consumable_constraint("pcpu", 2, htcq, ge_env, ["@htc.q"]) # imagine the node has 8 pcpus, but hpc.q limits it to 6, and htc.q to 4 assert node.available["pcpu"] == 8 assert node.available["hpc.q@pcpu"] == 6 assert node.available["htc.q@pcpu"] == 4 # the total amount and hpc.q are decremented, htc.q untouched assert c1.satisfied_by_node(node) assert c1.do_decrement(node) assert node.available["pcpu"] == 7 assert node.available["hpc.q@pcpu"] == 5 assert node.available["htc.q@pcpu"] == 4 # the total amount and htc.q are decremented, hpc.q untouched assert c2.satisfied_by_node(node) assert c2.do_decrement(node) assert node.available["pcpu"] == 5 assert node.available["hpc.q@pcpu"] == 5 assert node.available["htc.q@pcpu"] == 2 # the total amount and htc.q are decremented, hpc.q is floored # to the total amount assert c2.satisfied_by_node(node) assert c2.do_decrement(node) assert node.available["pcpu"] == 3 assert node.available["hpc.q@pcpu"] == 3 assert node.available["htc.q@pcpu"] == 0 # take out the remaining amount assert not c2.satisfied_by_node(node) for _ in range(3): assert c1.satisfied_by_node(node) assert c1.do_decrement(node) assert not c1.satisfied_by_node(node) assert node.available["pcpu"] == 0 assert node.available["hpc.q@pcpu"] == 0 assert node.available["htc.q@pcpu"] == 0
def test_minimum_space() -> None: c = MinResourcePerNode("pcpus", 1) assert 1 == c.minimum_space(SchedulerNode("", {"pcpus": 1})) assert 2 == c.minimum_space(SchedulerNode("", {"pcpus": 2})) snode = SchedulerNode("", {"pcpus": 2}) assert -1 == ExclusiveNode(assignment_id="1").minimum_space(snode) snode.assign("1") assert 0 == ExclusiveNode(assignment_id="1").minimum_space(snode)
def onprem_burst_demand() -> None: onprem001 = SchedulerNode( "onprem001", resources={"onprem": True, "nodetype": "A", "ncpus": 16} ) onprem002 = SchedulerNode( "onprem002", resources={"onprem": True, "nodetype": "A", "ncpus": 32} ) # onprem002 already has 10 cores occupied onprem002.available["ncpus"] -= 10 dcalc = new_demand_calculator(CONFIG, existing_nodes=[onprem001, onprem002]) dcalc.node_mgr.add_default_resource( {"node.nodearray": ["htc", "htcspot"]}, "nodetype", "A" ) assert [b for b in dcalc.node_mgr.get_buckets() if b.nodearray == "htc"][ 0 ].resources["nodetype"] == "A" dcalc.node_mgr.add_default_resource({}, "nodetype", "B") assert [b for b in dcalc.node_mgr.get_buckets() if b.nodearray == "htc"][ 0 ].resources["nodetype"] == "A" # we want 50 ncpus, but there are only 38 onpremise, so we need to burst # 12 more cores. dcalc.add_job(Job("tc-100", {"nodetype": "A", "ncpus": 1}, iterations=50)) demand_result = dcalc.finish() if not DRY_RUN: dcalc.bootup() # also note we can add defaults to the column by adding a :, like # onprem:False, as this is only defined on the onprem nodes and not # on the Azure nodes. print_demand( ["name", "job_ids", "nodetype", "onprem:False", "/ncpus"], demand_result )
def test_shared_constraint() -> None: qres = SharedConsumableResource("qres", "queue", 100, 100) cons = SharedConsumableConstraint([qres], 40) node = SchedulerNode("tux", {}) assert cons.satisfied_by_node(node) assert cons.do_decrement(node) assert qres.current_value == 60 assert cons.satisfied_by_node(node) assert cons.do_decrement(node) assert qres.current_value == 20 assert not cons.satisfied_by_node(node) assert not cons.do_decrement(node) assert qres.current_value == 20 cons = SharedConsumableConstraint([qres], 20) assert cons.satisfied_by_node(node) assert cons.do_decrement(node) assert qres.current_value == 0 qres = SharedNonConsumableResource("qres", "queue", "abc") cons = SharedNonConsumableConstraint(qres, "abc") assert cons.satisfied_by_node(node) assert cons.do_decrement(node) assert qres.current_value == "abc" cons = SharedNonConsumableConstraint(qres, "xyz") assert not cons.satisfied_by_node(node) assert not cons.do_decrement(node) assert qres.current_value == "abc" global_qres = SharedConsumableResource("qres", "queue", 100, 100) queue_qres = SharedConsumableResource("qres", "queue", 50, 50) qcons = SharedConsumableConstraint([global_qres, queue_qres], 30) assert qcons.satisfied_by_node(node) assert qcons.do_decrement(node) assert global_qres.current_value == 70 assert queue_qres.current_value == 20 assert not qcons.satisfied_by_node(node) assert not qcons.do_decrement(node) assert global_qres.current_value == 70 assert queue_qres.current_value == 20