def key_by_requested_slots( agent: AgentContext, requested_slots: ResourceSlot, ) -> Tuple[int, ResourceSlot]: unused_slot_keys = set() for k, v in requested_slots.items(): if v == Decimal(0): unused_slot_keys.add(k) num_extras = 0 for k, v in agent.available_slots.items(): if k in unused_slot_keys and v > Decimal(0): num_extras += 1 # Put back agents with more extra slot types # (e.g., accelerators) # Also put front agents with exactly required slot types return (-num_extras, agent.available_slots)
def _assign_agent( self, agents: Sequence[AgentContext], access_key: AccessKey, requested_slots: ResourceSlot, ) -> Optional[AgentId]: # If some predicate checks for a picked session fail, # this method is NOT called at all for the picked session. # In such case, we just skip updating self.per_user_dominant_share state # and the scheduler dispatcher continues to pick another session within the same scaling group. possible_agents = [] for agent in agents: remaining_slots = agent.available_slots - agent.occupied_slots if remaining_slots >= requested_slots: possible_agents.append(agent) if possible_agents: # We have one or more agents that can host the picked session. # Update the dominant share. # This is required to use to the latest dominant share information # when iterating over multiple pending sessions in a single scaling group. dominant_share_from_request = Decimal(0) for slot, value in requested_slots.items(): self.total_capacity.sync_keys(requested_slots) slot_cap = Decimal(self.total_capacity[slot]) if slot_cap == 0: continue slot_share = Decimal(value) / slot_cap if dominant_share_from_request < slot_share: dominant_share_from_request = slot_share if self.per_user_dominant_share[ access_key] < dominant_share_from_request: self.per_user_dominant_share[ access_key] = dominant_share_from_request # Choose the agent. chosen_agent = \ max(possible_agents, key=lambda a: a.available_slots) return chosen_agent.agent_id return None