def _process_quota( c: Complex, resource_name: str, node: Node, ge_queue: "GridEngineQueue", hostgroup: str, ) -> None: if c.is_excl: return namespaced_key = "{}@{}".format(ge_queue.qname, resource_name) if namespaced_key in node.available: return host_value = node.available.get(resource_name) # if no quota is defined, then use the host_value (which may also be undefined) quota = ge_queue.get_quota(c, hostgroup, node) if quota is None: quota = host_value if quota is None: return # host value isn't defined, then set it to the quota if host_value is None: host_value = quota if c.is_numeric: quota = min(quota, host_value) # type: ignore elif host_value: quota = host_value quotas_dict = node.metadata.get("quotas") if quotas_dict is None: quotas_dict = {} node.metadata["quotas"] = quotas_dict qname_dict = quotas_dict.get(ge_queue.qname) if qname_dict is None: qname_dict = {} quotas_dict[ge_queue.qname] = qname_dict hg_dict = qname_dict.get(hostgroup) if hg_dict is None: hg_dict = {} qname_dict[hostgroup] = hg_dict hg_dict[resource_name] = quota node.available[namespaced_key] = quota node._resources[namespaced_key] = quota
def do_decrement(self, node: Node) -> bool: """ pcpu = 10 q1@pcpu=4 q2@pcpu=8 if target is pcpu: pcpu=9, q1@pcpu=4, q2@pcpu=8 if target is q1@ : pcpu=9, q1@pcpu=3, q2@pcpu=8 if target is q2@ : pcpu=9, q1@pcpu=4, q2@pcpu=7 """ self.bucket_preprocessor(node) if isinstance(self.value, (str, bool)): return True matches = [] for resource_name, value in node.available.items(): if self.pattern.match(resource_name): matches.append((resource_name, value)) new_floor = node.available[self.resource_name] - self.value for match, match_value in matches: if "@" in match and match != self.target_resource: node.available[match] = min(new_floor, match_value) return self.child_constraint.do_decrement(node)
def __init__( self, hostname: str, resources: typing.Optional[dict] = None, bucket_id: typing.Optional[ht.BucketId] = None, ) -> None: resources = resources or ht.ResourceDict({}) private_ip: typing.Optional[ht.IpAddress] if SchedulerNode.ignore_hostnames: private_ip = None else: try: private_ip = ht.IpAddress(socket.gethostbyname(hostname)) except Exception as e: logging.warning("Could not find private ip for %s: %s", hostname, e) private_ip = None Node.__init__( self, node_id=DelayedNodeId(ht.NodeName(hostname)), name=ht.NodeName(hostname), nodearray=ht.NodeArrayName("unknown"), bucket_id=bucket_id or ht.BucketId(str(uuid4())), hostname=ht.Hostname(hostname), private_ip=private_ip, instance_id=None, vm_size=ht.VMSize("unknown"), location=ht.Location("unknown"), spot=False, vcpu_count=1, memory=ht.Memory(0, "b"), infiniband=False, state=ht.NodeStatus("running"), target_state=ht.NodeStatus("running"), power_state=ht.NodeStatus("running"), exists=True, placement_group=None, managed=False, resources=ht.ResourceDict(resources), software_configuration=ImmutableOrderedDict({}), keep_alive=False, )
def __init__( self, hostname: str, resources: typing.Optional[dict] = None, bucket_id: typing.Optional[ht.BucketId] = None, **overrides: typing.Any, ) -> None: resources = resources or ht.ResourceDict({}) node_init_args = dict( self=self, node_id=DelayedNodeId(ht.NodeName(hostname)), name=ht.NodeName(hostname), nodearray=ht.NodeArrayName("unknown"), bucket_id=bucket_id or ht.BucketId(str(uuid4())), hostname=ht.Hostname(hostname), private_ip=None, instance_id=None, vm_size=ht.VMSize("unknown"), location=ht.Location("unknown"), spot=False, vcpu_count=1, memory=ht.Memory(0, "b"), infiniband=False, state=ht.NodeStatus("running"), target_state=ht.NodeStatus("running"), power_state=ht.NodeStatus("running"), exists=True, placement_group=None, managed=False, resources=ht.ResourceDict(resources), software_configuration=ImmutableOrderedDict({}), keep_alive=False, ) node_init_args.update(overrides) Node.__init__(**node_init_args)
def __init__( self, definition: NodeDefinition, limits: BucketLimits, max_placement_group_size: int, nodes: List["Node"], artificial: bool = False, ) -> None: # example node to be used to see if your job would match this self.__definition = definition assert limits self.limits = limits self.max_placement_group_size = max_placement_group_size self.priority = 0 # list of nodes cyclecloud currently says are in this bucket self.nodes = nodes self.__decrement_counter = 0 example_node_name = ht.NodeName("{}-0".format(definition.nodearray)) self._artificial = artificial # TODO infiniband from hpc.autoscale.node.node import Node self.__example = Node( node_id=DelayedNodeId(example_node_name), name=example_node_name, nodearray=definition.nodearray, bucket_id=definition.bucket_id, hostname=None, private_ip=None, instance_id=None, vm_size=definition.vm_size, location=self.location, spot=definition.spot, vcpu_count=self.vcpu_count, memory=self.memory, infiniband=False, state=ht.NodeStatus("Off"), target_state=ht.NodeStatus("Off"), power_state=ht.NodeStatus("Off"), exists=False, placement_group=definition.placement_group, managed=False, resources=self.resources, software_configuration=definition.software_configuration, keep_alive=False, )
def get_node_hostgroups(config: Dict, node: Node) -> List[str]: hostgroups_expr = node.metadata.get("gridengine_hostgroups") if not hostgroups_expr: hostgroups_expr = node.software_configuration.get( "gridengine_hostgroups") if not hostgroups_expr: default_hostgroups = config.get("gridengine", {}).get("default_hostgroups", []) for dh in default_hostgroups: if "select" not in dh: logging.warning( "Missing key 'select' in gridengine.default_hostgroups %s", dh) continue if "hostgroups" not in dh: logging.warning( "Missing key 'hostgroups' in gridengine.default_hostgroups %s", dh) continue constraint_list = constraints.get_constraints(dh["select"]) satisfied = True for c in constraint_list: if not c.satisfied_by_node(node): satisfied = False break if satisfied: hostgroups = dh["hostgroups"] if isinstance(hostgroups, str): hostgroups = [hostgroups] hostgroups_expr = " ".join(hostgroups) # set it in metadata so we can output it in the cli node.metadata["gridengine_hostgroups"] = hostgroups_expr if hostgroups_expr: return re.split(",| +", hostgroups_expr) return []
def node_from_bucket( bucket: "NodeBucket", new_node_name: ht.NodeName, state: ht.NodeStatus, target_state: ht.NodeStatus, power_state: ht.NodeStatus, hostname: Optional[ht.Hostname] = None, placement_group: Optional[ht.PlacementGroup] = None, exists: bool = True, ) -> "Node": if hostname is None: hostname = ht.Hostname(util.uuid("hostname")) from hpc.autoscale.node.node import Node return Node( node_id=DelayedNodeId(new_node_name), name=new_node_name, nodearray=bucket.nodearray, bucket_id=bucket.bucket_id, vm_size=bucket.vm_size, hostname=hostname, private_ip=None, instance_id=None, location=bucket.location, spot=bucket.spot, vcpu_count=bucket.vcpu_count, memory=bucket.memory, infiniband=False, # TODO state=state, target_state=target_state, power_state=power_state, exists=exists, placement_group=placement_group, managed=True, resources=ht.ResourceDict(bucket.resources), software_configuration=bucket.software_configuration, keep_alive=False, )
def __repr__(self) -> str: return "Scheduler{}".format(Node.__repr__(self))
def __repr__(self) -> str: return "Temp{}".format(Node.__repr__(self))
def _node_override(node: Node) -> Dict: c = node.node_attribute_overrides.get("Configuration", {}) node.node_attribute_overrides["Configuration"] = c return c
def minimum_space(self, node: Node) -> int: # when calculating the min space of an example node copy = node.clone() self.bucket_preprocessor(copy) return self.child_constraint.minimum_space(copy)
def satisfied_by_node(self, node: Node) -> Result: copy = node.clone() self.bucket_preprocessor(copy) return self.child_constraint.satisfied_by_node(copy)