def test_place_sampling(self, shuffle, client_class): client_class.side_effect = self.create_fake_client shuffle.side_effect = self.fake_shuffle( [ChildInfo(id="bar", address="bar"), ChildInfo(id="baz", address="baz"), ChildInfo(id="qux", address="qux")]) bar_client = MagicMock() bar_response = PlaceResponse(PlaceResultCode.OK, agent_id="bar", score=Score(5, 90)) bar_client.place.return_value = bar_response self._clients["bar"] = bar_client baz_client = MagicMock() baz_response = PlaceResponse(PlaceResultCode.OK, agent_id="baz", score=Score(30, 80)) baz_client.place.return_value = baz_response self._clients["baz"] = baz_client scheduler = BranchScheduler("foo", 9) scheduler.configure([ChildInfo(id="qux", address="qux"), ChildInfo(id="bar", address="bar"), ChildInfo(id="baz", address="baz")]) response = scheduler.place(self._place_request()) assert_that(response, is_(same_instance(baz_response)))
def place(self, request): """Place the specified resources. :type request: PlaceRequest :rtype: PlaceResponse """ constraints = self._collect_constraints(request.resource) self._logger.info("Constraints: %s", constraints) selected = self._placement_schedulers(request, constraints) if len(selected) == 0: return PlaceResponse(PlaceResultCode.RESOURCE_CONSTRAINT) done = self._execute_placement(selected, request) responses = [] had_resource_constraint = False for future in done: response = future.result() if response.result == PlaceResultCode.OK: responses.append(response) elif response.result == PlaceResultCode.RESOURCE_CONSTRAINT: had_resource_constraint = True best_response = self._scorer.score(responses) if best_response is not None: return best_response elif had_resource_constraint: return PlaceResponse(PlaceResultCode.RESOURCE_CONSTRAINT) else: return PlaceResponse(PlaceResultCode.SYSTEM_ERROR)
def test_place_with_resource_constraints(self, client_class): client_class.side_effect = self.create_fake_client bar_client = MagicMock() bar_response = PlaceResponse(PlaceResultCode.OK, agent_id="bar", score=Score(5, 90)) bar_client.place.return_value = bar_response self._clients["bar"] = bar_client baz_client = MagicMock() baz_response = PlaceResponse(PlaceResultCode.OK, agent_id="baz", score=Score(30, 80)) baz_client.place.return_value = baz_response self._clients["baz"] = baz_client scheduler = BranchScheduler("foo", 9) scheduler.configure([ ChildInfo(id="bar", address="bar", constraints=[ ResourceConstraint(ResourceConstraintType.DATASTORE, ["1"])]), ChildInfo(id="baz", address="baz", constraints=[ ResourceConstraint(ResourceConstraintType.DATASTORE, ["2"])])]) request = self._place_request() request.resource.vm.resource_constraints = [ResourceConstraint( ResourceConstraintType.DATASTORE, ["1"])] response = scheduler.place(request) assert_that(response.result, is_(PlaceResultCode.OK)) assert_that(response.agent_id, is_("bar"))
def test_new_resource_score(self): response = self.scorer.score([ PlaceResponse(score=Score(40, 0)), PlaceResponse(score=Score(50, 0)) ]) # should pick one with higher utilization score assert_that(response, is_(PlaceResponse(score=Score(50, 0))))
def test_highest_score_random(self): place_responses = [] for i in xrange(10): resp = PlaceResponse( score=Score(random.randint(0, 100), random.randint(0, 100))) place_responses.append(resp) self._test_highest_score(place_responses)
def reserve(self, disk=None, vm_disks=None, place_response=PlaceResponse(), expect=Host.ReserveResultCode.OK): resource = self.resource_request(disk, vm_disks) resource.placement_list = place_response.placementList request = Host.ReserveRequest(resource, place_response.generation) response = rpc_call(self.host_client.reserve, request) assert_that(response.result, equal_to(expect)) return response
def test_same_score_pick_random(self): responseA = PlaceResponse(score=Score(98, 100)) responseB = PlaceResponse(score=Score(98, 100)) a_freq = 0 b_freq = 0 # Still can fail with 1/2^30 probability, should be negligible for i in xrange(0, 30): response = self.scorer.score([responseA, responseB]) if response is responseA: a_freq += 1 else: b_freq += 1 assert_that(a_freq, greater_than(0)) assert_that(b_freq, greater_than(0))
def test_place_resource_not_found(self, client_class): client_class.side_effect = self.create_fake_client bar_client = MagicMock() bar_response = PlaceResponse(PlaceResultCode.RESOURCE_CONSTRAINT) bar_client.place.return_value = bar_response self._clients["bar"] = bar_client baz_client = MagicMock() baz_response = PlaceResponse(PlaceResultCode.SYSTEM_ERROR) baz_client.place.return_value = baz_response self._clients["baz"] = baz_client scheduler = BranchScheduler("foo", 9) scheduler.configure([ChildInfo(id="bar", address="bar"), ChildInfo(id="baz", address="baz")]) response = scheduler.place(self._place_request()) assert_that(response.result, is_(PlaceResultCode.RESOURCE_CONSTRAINT))
def test_place_response(self): response = PlaceResponse(PlaceResultCode.OK) scheduler = MagicMock() scheduler.place.return_value = response handler = SchedulerHandler() handler._schedulers["foo"] = scheduler actual_response = handler.place(PlaceRequest(Resource(), "foo")) assert_that(actual_response, is_(same_instance(response)))
def place(self, request): """Place the specified resource(s). :type request: PlaceRequest :rtype: PlaceResponse """ try: scheduler = self._get_scheduler(request.scheduler_id) return scheduler.place(request) except InvalidScheduler: return PlaceResponse(PlaceResultCode.INVALID_SCHEDULER)
def test_place_local_agent(self, client_class): client_class.side_effect = self.create_fake_client remote_client = MagicMock() self._clients["local-id"] = remote_client local_response = PlaceResponse( PlaceResultCode.OK, agent_id="local-id", score=Score(5, 90)) self._scheduler_handler.host_place.return_value = local_response scheduler = LeafScheduler("foo", 9, False) scheduler.configure([ChildInfo(id="local-id")]) response = scheduler.place(self._place_request()) assert_that(remote_client.place.called, is_(False)) assert_that(response, is_(same_instance(local_response)))
def place(self, request): """Place the specified resources. :type request: PlaceRequest :rtype: PlaceResponse :raise: InvalidScheduler """ if self._configured == ConfigStates.UNINITIALIZED: raise InvalidScheduler() request.scheduler_id = None constraints = self._collect_constraints(request.resource) selected = self._placement_hosts(request, constraints) if len(selected) == 0: return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE) selected = self._filter_missing_hosts(selected) done = self._execute_placement(selected, request) responses = [] no_such_resource = False not_enough_memory_resource = False not_enough_cpu_resource = False not_enough_datastore_capacity = False for future in done: try: response = future.result() if response.result == PlaceResultCode.OK: responses.append(response) elif response.result == \ PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE: not_enough_cpu_resource = True elif response.result == \ PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE: not_enough_memory_resource = True elif response.result == \ PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY: not_enough_datastore_capacity = True elif response.result == \ PlaceResultCode.NO_SUCH_RESOURCE: no_such_resource = True except Exception, e: self._logger.warning( "Caught exception while sending " "place request: %s", str(e))
def _create_scheduler_with_hosts(self, name, number_of_hosts): scheduler = LeafScheduler(name, 1.0, False) hosts = [] for num in range(number_of_hosts): child = name + "_host_" + str(num) child_id = "id_" + child child_address = "address_" + child hosts.append( ChildInfo(id=child_id, address=child_address, port=num)) client = MagicMock() response = PlaceResponse(PlaceResultCode.OK, agent_id=child_id, score=Score(100, 90)) client.host_place.return_value = response self._clients[child_address] = client scheduler.configure(hosts) return scheduler
def test_place_res_with_missing(self, health_checker, client_class): client_class.side_effect = self.create_fake_client _health_checker = MagicMock() health_checker.return_value = _health_checker _health_checker.get_missing_hosts = ["bar"] baz_client = MagicMock() baz_response = PlaceResponse(PlaceResultCode.OK, agent_id="baz", score=Score(30, 80)) baz_client.host_place.return_value = baz_response self._clients["baz"] = baz_client scheduler = LeafScheduler("foo", 1.0, True) scheduler.configure([ChildInfo(id="bar", address="bar"), ChildInfo(id="baz", address="baz")]) response = scheduler.place(self._place_request()) assert_that(response, is_(same_instance(baz_response)))
def test_place_timeout(self, client_class, wait_fn): client_class.side_effect = self.create_fake_client client = MagicMock() response = PlaceResponse(PlaceResultCode.OK) client.place.return_value = response self._clients["scheduler/baz"] = client wait_fn.return_value = set(), set() scheduler = BranchScheduler("foo", 9) scheduler.configure([ChildInfo(id="baz")]) response = scheduler.place(self._place_request()) assert_that(response.result, is_(PlaceResultCode.SYSTEM_ERROR)) # must shutdown to join the futures since the patch lifecycle # ends when this method returns self._threadpool.shutdown()
class LeafScheduler(BaseScheduler): """Leaf scheduler manages child hosts.""" def __init__(self, scheduler_id, ut_ratio, enable_health_checker=True): """Create a new leaf scheduler. :param scheduler_id: scheduler id :type scheduler_id: str :type enable_health_checker: enables health checking of children. """ self._logger = logging.getLogger(__name__) self._logger.info("Creating leaf scheduler: %s" % scheduler_id) self.lock = threading.RLock() self._latch = CountUpDownLatch() self._place_strategy = RandomSubsetStrategy(PLACE_FAN_OUT_RATIO, MIN_PLACE_FAN_OUT, MAX_PLACE_FAN_OUT) self._scheduler_id = scheduler_id self._hosts = [] self._scorer = DefaultScorer(ut_ratio) self._threadpool = None self._initialize_services(scheduler_id) self._health_checker = None self._enable_health_checker = enable_health_checker self._configured = ConfigStates.UNINITIALIZED def _initialize_services(self, scheduler_id): self._threadpool = common.services.get(ThreadPoolExecutor) self._scheduler_client = SchedulerClient() @locked def configure(self, hosts): """Configure the leaf scheduler. :param hosts: list of child hosts :type hosts: list of str """ # Transfer children's constraints from list to set, so searching # elements are more efficient. self._hosts = [] for host in hosts: self._hosts.append(ChildInfo.from_thrift(host)) self._coalesce_resources(self._hosts) if self._health_checker: self._health_checker.stop() if self._enable_health_checker: # initialize health checker with the new set of children. agent_config = common.services.get(ServiceName.AGENT_CONFIG) children = dict((host.id, ServerAddress(host.address, host.port)) for host in self._hosts) self._health_checker = HealthChecker(self._scheduler_id, children, agent_config) self._health_checker.start() self._configured = ConfigStates.INITIALIZED @locked def _get_hosts(self): """ Get the list of hosts for this scheduler. The returned list is a deep copy of the set of hosts so a subsequent call to configure the host is not stepping on calls in flight. Assumes the host is configured as a leaf scheduler. :rtype: list of str """ return list(self._hosts) def mark_pending(func): """ Decorator for bumping up the pending count for calls that are inflight. """ @log_request(log_level=logging.debug) def nested(self, *args, **kwargs): self._latch.count_up() self._logger.debug("latch counted up to: {0}".format( self._latch.count)) try: return func(self, *args, **kwargs) finally: self._latch.count_down() self._logger.debug("latch counted down to: {0}".format( self._latch.count)) return nested @mark_pending def find(self, request): """Find the specified resource. :type request: FindRequest :rtype: FindResponse :raise: InvalidScheduler """ if self._configured == ConfigStates.UNINITIALIZED: raise InvalidScheduler() # Host service only has a single scheduler request.scheduler_id = None futures = [] for agent in self._get_hosts(): future = self._threadpool.submit(self._find_worker, agent.address, agent.port, agent.id, request) futures.append(future) done, not_done = concurrent.futures.wait(futures, timeout=FIND_TIMEOUT) self._logger.info("Find responses received: %d, timed out: %d", len(done), len(not_done)) for future in done: response = future.result() if response.result == FindResultCode.OK: return response return FindResponse(FindResultCode.NOT_FOUND) @mark_pending def place(self, request): """Place the specified resources. :type request: PlaceRequest :rtype: PlaceResponse :raise: InvalidScheduler """ if self._configured == ConfigStates.UNINITIALIZED: raise InvalidScheduler() request.scheduler_id = None constraints = self._collect_constraints(request.resource) selected = self._placement_hosts(request, constraints) if len(selected) == 0: return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE) selected = self._filter_missing_hosts(selected) done = self._execute_placement(selected, request) responses = [] no_such_resource = False not_enough_memory_resource = False not_enough_cpu_resource = False not_enough_datastore_capacity = False for future in done: try: response = future.result() if response.result == PlaceResultCode.OK: responses.append(response) elif response.result == \ PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE: not_enough_cpu_resource = True elif response.result == \ PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE: not_enough_memory_resource = True elif response.result == \ PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY: not_enough_datastore_capacity = True elif response.result == \ PlaceResultCode.NO_SUCH_RESOURCE: no_such_resource = True except Exception, e: self._logger.warning( "Caught exception while sending " "place request: %s", str(e)) best_response = self._scorer.score(responses) if best_response is not None: return best_response elif not_enough_cpu_resource: return PlaceResponse(PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE) elif not_enough_memory_resource: return PlaceResponse(PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE) elif not_enough_datastore_capacity: return PlaceResponse(PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY) elif no_such_resource: return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE) else: return PlaceResponse(PlaceResultCode.SYSTEM_ERROR)
def test_place_with_resource_constraints(self, client_class): client_class.side_effect = self.create_fake_client bar_client = MagicMock() bar_response = PlaceResponse(PlaceResultCode.OK, agent_id="baz_1", score=Score(5, 90)) bar_client.host_place.return_value = bar_response self._clients["baz_1"] = bar_client baz_client = MagicMock() baz_response = PlaceResponse(PlaceResultCode.OK, agent_id="baz_2", score=Score(30, 80)) baz_client.host_place.return_value = baz_response self._clients["baz_2"] = baz_client baz_client = MagicMock() baz_response = PlaceResponse(PlaceResultCode.OK, agent_id="baz_3", score=Score(35, 85)) baz_client.host_place.return_value = baz_response self._clients["baz_3"] = baz_client scheduler = LeafScheduler("foo", 8, False) scheduler.configure(self._build_scheduler_configure()) # Check OR among values, should succeed request = self._place_request() request.resource.vm.resource_constraints = [ResourceConstraint( ResourceConstraintType.NETWORK, ["net_1", "net_17"])] response = scheduler.place(request) assert_that(response.result, is_(PlaceResultCode.OK)) assert_that(response.agent_id, is_("baz_1")) # Check AND among constraints, should fail request = self._place_request() request.resource.vm.resource_constraints = [ ResourceConstraint( ResourceConstraintType.NETWORK, ["net_1"]), ResourceConstraint( ResourceConstraintType.NETWORK, ["net_4"])] response = scheduler.place(request) assert_that(response.result, is_(PlaceResultCode.NO_SUCH_RESOURCE)) # Check AND among constraints, should succeed request = self._place_request() request.resource.vm.resource_constraints = [ ResourceConstraint( ResourceConstraintType.DATASTORE, ["ds_1"]), ResourceConstraint( ResourceConstraintType.NETWORK, ["net_2"])] response = scheduler.place(request) assert_that(response.result, is_(PlaceResultCode.OK)) assert_that(response.agent_id, is_("baz_1")) # Check OR among values + AND among constraints, should succeed request = self._place_request() request.resource.vm.resource_constraints = [ ResourceConstraint( ResourceConstraintType.NETWORK, ["net_1", "net_17"]), ResourceConstraint( ResourceConstraintType.DATASTORE_TAG, ["ds_tag_18", "ds_tag_1"])] response = scheduler.place(request) assert_that(response.result, is_(PlaceResultCode.OK)) assert_that(response.agent_id, is_("baz_1")) # Check OR among values + AND among constraints, should fail request = self._place_request() request.resource.vm.resource_constraints = [ ResourceConstraint( ResourceConstraintType.NETWORK, ["net_1", "net_17"]), ResourceConstraint( ResourceConstraintType.DATASTORE_TAG, ["ds_tag_18", "ds_tag_19"])] response = scheduler.place(request) assert_that(response.result, is_(PlaceResultCode.NO_SUCH_RESOURCE))
def test_random_scorer_close_scores(self, scores, ut_ratio, expected): scorer = RandomScorer(ut_ratio=ut_ratio) place_responses = [PlaceResponse(score=score) for score in scores] response = scorer.score(place_responses) assert_that(expected, has_item(response.score))
def test_ratio(self, scores, ratio, expected): scorer = DefaultScorer(ut_ratio=ratio) place_responses = [PlaceResponse(score=score) for score in scores] response = scorer.score(place_responses) assert_that(response.score, is_(expected))
def place(self, request): self.place_reqs.append(request) return PlaceResponse(PlaceResultCode.OK)