def test_stop(self): """Make sure start() starts threads and stop() stops them""" health_checker = HealthChecker("id", {}, self.conf) self.assertFalse(health_checker._heartbeater.is_alive()) self.assertFalse(health_checker._reporter.is_alive()) health_checker.start() self.assertTrue(health_checker._heartbeater.is_alive()) self.assertTrue(health_checker._reporter.is_alive()) health_checker.stop() self.assertFalse(health_checker._heartbeater.is_alive()) self.assertFalse(health_checker._reporter.is_alive())
class LeafScheduler(BaseScheduler): """Leaf scheduler manages child hosts.""" def __init__(self, scheduler_id, ut_ratio, enable_health_checker=True): """Create a new leaf scheduler. :param scheduler_id: scheduler id :type scheduler_id: str :type enable_health_checker: enables health checking of children. """ self._logger = logging.getLogger(__name__) self._logger.info("Creating leaf scheduler: %s" % scheduler_id) self.lock = threading.RLock() self._latch = CountUpDownLatch() self._place_strategy = RandomSubsetStrategy(PLACE_FAN_OUT_RATIO, MIN_PLACE_FAN_OUT, MAX_PLACE_FAN_OUT) self._scheduler_id = scheduler_id self._hosts = [] self._scorer = DefaultScorer(ut_ratio) self._threadpool = None self._initialize_services(scheduler_id) self._health_checker = None self._enable_health_checker = enable_health_checker self._configured = ConfigStates.UNINITIALIZED def _initialize_services(self, scheduler_id): self._threadpool = common.services.get(ThreadPoolExecutor) self._scheduler_client = SchedulerClient() @locked def configure(self, hosts): """Configure the leaf scheduler. :param hosts: list of child hosts :type hosts: list of str """ # Transfer children's constraints from list to set, so searching # elements are more efficient. self._hosts = [] for host in hosts: self._hosts.append(ChildInfo.from_thrift(host)) self._coalesce_resources(self._hosts) if self._health_checker: self._health_checker.stop() if self._enable_health_checker: # initialize health checker with the new set of children. agent_config = common.services.get(ServiceName.AGENT_CONFIG) children = dict((host.id, ServerAddress(host.address, host.port)) for host in self._hosts) self._health_checker = HealthChecker(self._scheduler_id, children, agent_config) self._health_checker.start() self._configured = ConfigStates.INITIALIZED @locked def _get_hosts(self): """ Get the list of hosts for this scheduler. The returned list is a deep copy of the set of hosts so a subsequent call to configure the host is not stepping on calls in flight. Assumes the host is configured as a leaf scheduler. :rtype: list of str """ return list(self._hosts) def mark_pending(func): """ Decorator for bumping up the pending count for calls that are inflight. """ @log_request(log_level=logging.debug) def nested(self, *args, **kwargs): self._latch.count_up() self._logger.debug("latch counted up to: {0}".format( self._latch.count)) try: return func(self, *args, **kwargs) finally: self._latch.count_down() self._logger.debug("latch counted down to: {0}".format( self._latch.count)) return nested @mark_pending def find(self, request): """Find the specified resource. :type request: FindRequest :rtype: FindResponse :raise: InvalidScheduler """ if self._configured == ConfigStates.UNINITIALIZED: raise InvalidScheduler() # Host service only has a single scheduler request.scheduler_id = None futures = [] for agent in self._get_hosts(): future = self._threadpool.submit(self._find_worker, agent.address, agent.port, agent.id, request) futures.append(future) done, not_done = concurrent.futures.wait(futures, timeout=FIND_TIMEOUT) self._logger.info("Find responses received: %d, timed out: %d", len(done), len(not_done)) for future in done: response = future.result() if response.result == FindResultCode.OK: return response return FindResponse(FindResultCode.NOT_FOUND) @mark_pending def place(self, request): """Place the specified resources. :type request: PlaceRequest :rtype: PlaceResponse :raise: InvalidScheduler """ if self._configured == ConfigStates.UNINITIALIZED: raise InvalidScheduler() request.scheduler_id = None constraints = self._collect_constraints(request.resource) selected = self._placement_hosts(request, constraints) if len(selected) == 0: return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE) selected = self._filter_missing_hosts(selected) done = self._execute_placement(selected, request) responses = [] no_such_resource = False not_enough_memory_resource = False not_enough_cpu_resource = False not_enough_datastore_capacity = False for future in done: try: response = future.result() if response.result == PlaceResultCode.OK: responses.append(response) elif response.result == \ PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE: not_enough_cpu_resource = True elif response.result == \ PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE: not_enough_memory_resource = True elif response.result == \ PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY: not_enough_datastore_capacity = True elif response.result == \ PlaceResultCode.NO_SUCH_RESOURCE: no_such_resource = True except Exception, e: self._logger.warning( "Caught exception while sending " "place request: %s", str(e)) best_response = self._scorer.score(responses) if best_response is not None: return best_response elif not_enough_cpu_resource: return PlaceResponse(PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE) elif not_enough_memory_resource: return PlaceResponse(PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE) elif not_enough_datastore_capacity: return PlaceResponse(PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY) elif no_such_resource: return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE) else: return PlaceResponse(PlaceResultCode.SYSTEM_ERROR)
class LeafScheduler(BaseScheduler): """Leaf scheduler manages child hosts.""" def __init__(self, scheduler_id, ut_ratio, enable_health_checker=True): """Create a new leaf scheduler. :param scheduler_id: scheduler id :type scheduler_id: str :type enable_health_checker: enables health checking of children. """ self._logger = logging.getLogger(__name__) self._logger.info("Creating leaf scheduler: %s" % scheduler_id) self.lock = threading.RLock() self._latch = CountUpDownLatch() self._place_strategy = RandomSubsetStrategy(PLACE_FAN_OUT_RATIO, MIN_PLACE_FAN_OUT, MAX_PLACE_FAN_OUT) self._scheduler_id = scheduler_id self._hosts = [] self._scorer = DefaultScorer(ut_ratio) self._threadpool = None self._initialize_services(scheduler_id) self._health_checker = None self._enable_health_checker = enable_health_checker self._configured = ConfigStates.UNINITIALIZED def _initialize_services(self, scheduler_id): self._threadpool = common.services.get(ThreadPoolExecutor) self._scheduler_client = SchedulerClient() @locked def configure(self, hosts): """Configure the leaf scheduler. :param hosts: list of child hosts :type hosts: list of str """ # Transfer children's constraints from list to set, so searching # elements are more efficient. self._hosts = [] for host in hosts: self._hosts.append(ChildInfo.from_thrift(host)) self._coalesce_resources(self._hosts) if self._health_checker: self._health_checker.stop() if self._enable_health_checker: # initialize health checker with the new set of children. agent_config = common.services.get(ServiceName.AGENT_CONFIG) children = dict((host.id, ServerAddress(host.address, host.port)) for host in self._hosts) self._health_checker = HealthChecker(self._scheduler_id, children, agent_config) self._health_checker.start() self._configured = ConfigStates.INITIALIZED @locked def _get_hosts(self): """ Get the list of hosts for this scheduler. The returned list is a deep copy of the set of hosts so a subsequent call to configure the host is not stepping on calls in flight. Assumes the host is configured as a leaf scheduler. :rtype: list of str """ return list(self._hosts) def mark_pending(func): """ Decorator for bumping up the pending count for calls that are inflight. """ @log_request(log_level=logging.debug) def nested(self, *args, **kwargs): self._latch.count_up() self._logger.debug( "latch counted up to: {0}".format(self._latch.count)) try: return func(self, *args, **kwargs) finally: self._latch.count_down() self._logger.debug( "latch counted down to: {0}".format(self._latch.count)) return nested @mark_pending def find(self, request): """Find the specified resource. :type request: FindRequest :rtype: FindResponse :raise: InvalidScheduler """ if self._configured == ConfigStates.UNINITIALIZED: raise InvalidScheduler() # Host service only has a single scheduler request.scheduler_id = None futures = [] for agent in self._get_hosts(): future = self._threadpool.submit( self._find_worker, agent.address, agent.port, agent.id, request) futures.append(future) done, not_done = concurrent.futures.wait(futures, timeout=FIND_TIMEOUT) self._logger.info("Find responses received: %d, timed out: %d", len(done), len(not_done)) for future in done: response = future.result() if response.result == FindResultCode.OK: return response return FindResponse(FindResultCode.NOT_FOUND) @mark_pending def place(self, request): """Place the specified resources. :type request: PlaceRequest :rtype: PlaceResponse :raise: InvalidScheduler """ if self._configured == ConfigStates.UNINITIALIZED: raise InvalidScheduler() request.scheduler_id = None constraints = self._collect_constraints(request.resource) selected = self._placement_hosts(request, constraints) if len(selected) == 0: return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE) selected = self._filter_missing_hosts(selected) done = self._execute_placement(selected, request) responses = [] no_such_resource = False not_enough_memory_resource = False not_enough_cpu_resource = False not_enough_datastore_capacity = False for future in done: try: response = future.result() if response.result == PlaceResultCode.OK: responses.append(response) elif response.result == \ PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE: not_enough_cpu_resource = True elif response.result == \ PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE: not_enough_memory_resource = True elif response.result == \ PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY: not_enough_datastore_capacity = True elif response.result == \ PlaceResultCode.NO_SUCH_RESOURCE: no_such_resource = True except Exception, e: self._logger.warning( "Caught exception while sending " "place request: %s", str(e)) best_response = self._scorer.score(responses) if best_response is not None: return best_response elif not_enough_cpu_resource: return PlaceResponse(PlaceResultCode.NOT_ENOUGH_CPU_RESOURCE) elif not_enough_memory_resource: return PlaceResponse(PlaceResultCode.NOT_ENOUGH_MEMORY_RESOURCE) elif not_enough_datastore_capacity: return PlaceResponse(PlaceResultCode.NOT_ENOUGH_DATASTORE_CAPACITY) elif no_such_resource: return PlaceResponse(PlaceResultCode.NO_SUCH_RESOURCE) else: return PlaceResponse(PlaceResultCode.SYSTEM_ERROR)